Make sure that Source option is set to Chunk output in Console and do not select Show Previews inline

Tentative conclusions

  1. Mosaic forest has higher species richness and higher Shannon richness than the monospecies plantations: JC, EC, and BB (marginal diff over BB)
  2. Mosaic forest composition is more similar to Natural forest than any of the monospecies plantations are to Natural forest
  3. The source of the beta diversity in this system is entirely turnover, which means that each of the monospecies plantations has its own set of species

start R analysis

library(ape) #read.tree() 
library(tidyverse) 
## ── Attaching packages ─────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.4
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.4-5
library(beanplot)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(iNEXT)
library(iNextPD)
## 
## Attaching package: 'iNextPD'
## The following object is masked from 'package:iNEXT':
## 
##     ggiNEXT
library(ade4)
library(boral)
## Loading required package: coda
## If you recently updated boral, please check news(package = "boral") for the updates in the latest version.
library(mvabund)
library(RColorBrewer)
library(betapart)
library(forcats)
library(stringr)
library(SpeciesMix)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: numDeriv
library(beepr)
library(corrplot)
## corrplot 0.84 loaded
library(lulu)

sessionInfo()
## R version 3.3.3 (2017-03-06)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS  10.13.6
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lulu_0.1.0         corrplot_0.84      beepr_1.3         
##  [4] SpeciesMix_0.3.4   numDeriv_2016.8-1  MASS_7.3-50       
##  [7] betapart_1.5.0     RColorBrewer_1.1-2 mvabund_3.12.3    
## [10] boral_1.6.1        coda_0.19-1        ade4_1.7-10       
## [13] iNextPD_0.3.2      iNEXT_2.0.12       car_3.0-0         
## [16] carData_3.0-1      beanplot_1.2       vegan_2.4-5       
## [19] lattice_0.20-35    permute_0.9-4      forcats_0.3.0     
## [22] stringr_1.3.1      dplyr_0.7.4        purrr_0.2.5       
## [25] readr_1.1.1        tidyr_0.8.1        tibble_1.4.2      
## [28] ggplot2_2.2.1      tidyverse_1.2.1    ape_5.0           
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-131      lubridate_1.7.4   httr_1.3.1       
##  [4] rprojroot_1.3-2   tools_3.3.3       backports_1.1.2  
##  [7] R6_2.2.2          R2WinBUGS_2.1-21  lazyeval_0.2.1   
## [10] mgcv_1.8-22       colorspace_1.3-2  mnormt_1.5-5     
## [13] curl_3.2          cli_1.0.0         rvest_0.3.2      
## [16] xml2_1.2.0        scales_0.5.0      mvtnorm_1.0-6    
## [19] psych_1.8.4       digest_0.6.15     foreign_0.8-70   
## [22] rmarkdown_1.9     rio_0.5.10        pkgconfig_2.0.1  
## [25] htmltools_0.3.6   rlang_0.2.1       readxl_1.1.0     
## [28] rstudioapi_0.7    bindr_0.1         jsonlite_1.5     
## [31] zip_1.0.0         magrittr_1.5      Matrix_1.2-12    
## [34] Rcpp_0.12.17      munsell_0.4.3     abind_1.4-5      
## [37] stringi_1.2.2     yaml_2.1.19       plyr_1.8.4       
## [40] fishMod_0.29      grid_3.3.3        parallel_3.3.3   
## [43] crayon_1.3.4      haven_1.1.1       hms_0.4.2        
## [46] knitr_1.20        rcdd_1.2          pillar_1.2.3     
## [49] boot_1.3-20       reshape2_1.4.3    magic_1.5-8      
## [52] R2jags_0.5-7      picante_1.7       glue_1.2.0       
## [55] evaluate_0.10.1   data.table_1.11.4 modelr_0.1.2     
## [58] cellranger_1.1.0  gtable_0.2.0      assertthat_0.2.0 
## [61] openxlsx_4.1.0    tweedie_2.3.2     broom_0.4.4      
## [64] rjags_4-6         audio_0.1-5       geometry_0.3-6   
## [67] bindrcpp_0.2      cluster_2.0.6     statmod_1.4.30

After CROP clustering with 97sim

run following commands on terminal for getting match_list

cd Documents/kiz/ecec/2014-GFGP/MBC-miseq/Eco_analysis/2014MBC_CROP97_3507otus/1lulu_3507 vsearch –usearch_global 2libs_CROP97.cluster3507.fasta –db 2libs_CROP97.cluster3507.fasta –self –id .84 –iddef 1 –userout match_list.txt -userfields query+target+id –maxaccepts 0 –query_cov .9 –maxhits 10

using match list for lulu

we do phyloseq after lulu

commnunity analyses start from here

inputfile <- "./data/2014MBC_44reads_otu543_LandsatEnv.txt" # post phyloseq filtering and filtering for trees, using final bioinformatics pipeline with vsearch and RDP Classifier and phyloseq at min20reads, with landsat data

# command from readr package, with options on formatting the columns
gfgMB <- read_tsv(
   inputfile, col_names = TRUE, na = "NA",
   col_types = cols(
     Site = col_character(),
     Habitat = col_factor(c("BB", "CL", "EC", "JC", "MF", "NF")),
     Type = col_factor(c("1", "2", "3", "4", "5", "6")),
     Altitude = col_integer(),
     sampling_time = col_date(format = "%d/%m/%Y"),
     weather_value = col_factor(c("cloudy", "sunny", "rainy")),
     Landsat_value = col_factor(c("1", "2", "3", "4", "5", "6")),
     longitude = col_double(),
     latitude = col_double(),
     Elevation_m = col_integer()
     )
 )

gfgMB <- tbl_df(gfgMB)

# with(gfgMB, plot(Altitude ~ Elevation_m, col=as.numeric(Habitat)))  # Altitude variable is not reliable;  use Elevation_m instead, which is calculated from DEM
# make environment dataset
habitat <- gfgMB %>% dplyr::select(Site:Simpson_evenness_index)
habitat <- habitat %>% dplyr::filter(!is.na(Habitat)) # remove taxonomy rows
colnames(habitat)[11:80] <- paste0("x", colnames(habitat)[11:80]) # column names need to start with a letter
community <- gfgMB %>% dplyr::select(starts_with("Cluster"))
# otuvector <- colnames(community)
community_t <- t(community)
community_t <- as.data.frame(community_t)
community_t <- rownames_to_column(community_t)
colnames(community_t) <- c("otu", gfgMB$Site) # add column names

#communityAll_t <- community_t %>% dplyr::filter(phylum=="Arthropoda")
#communityAll_t <- community_t %>% dplyr::select(-c(kingdom:species))
communityAll <- t(community_t)
colvector <- communityAll[1,] # make a vector of the first row, which has the otu names
communityAll <- as.data.frame(communityAll)
colnames(communityAll) <-  colvector # add the otu names to the column names
communityAll <- communityAll[-1,] # remove first row, which has the column names
# convert the columns to numeric from factor
# http://stackoverflow.com/questions/2288485/how-to-convert-a-data-frame-column-to-numeric-type
communityAll <- sapply(communityAll, function(x) as.numeric(as.character(x))) # sapply applies a function to each column, and the function is:  function(x) as.numeric(as.character(x)).  Cannot convert factors to numeric directly. first convert to character, then to numeric
communityAll <- as.data.frame(communityAll) # then convert to df

visualise a histogram of read number per OTU

##### calculate distribution of read numbers per OTU to set minimum number 
otureads <- c(colSums(communityAll)) # list of the reads per OTU
sum(otureads) ## 1,900,539 reads total 
## [1] 1840033
otureads[otureads>5000] <- 5000 # to make the histogram readable
otuhist <- hist(otureads, breaks = 100)
text(otuhist$mids, otuhist$counts, cex = 0.5, otuhist$counts, adj = c(.5, -.5), col = "blue3")

Starting ecological analysis

Filter out sites with (1) low reads (<= 100), (2) very low numbers of species

community <- communityAll
#community <- t(new_table_lulu)
community <- as.data.frame(community)
#sort(colSums(community))
community[community < 5] <- 0 # set small cells to 0.

habitat$rowsums <- rowSums(community)
habitat$sprichness <- specnumber(community, MARGIN = 1) # number of species per site
# keep only sites that have more than 100 reads (removed 2, still remain 68)
community <- community %>% dplyr::filter(habitat$rowsums > 100)
habitatN <- habitat %>% dplyr::filter(habitat$rowsums > 100)
rowSums(community)
##  [1]  12659  33270  36571  17609  10507   2171  32120  14412  28118  12189
## [11]  43303  40239  33982  36947  11959  28858   5511  18808  21798   7691
## [21]   1800  55657  24493   6154  13830  37230  34459   2408  25348  29522
## [31]  41848  40834  22431  46323  49219  20747  47123  69667  13828  10068
## [41]  35544  29235  14434  12513  38770  19366  15117    882  27033   6975
## [51]  22962   6660   1042  28549   1997  20437  37110  39174  57522  93620
## [61]  21563    665  31150  33367  24136  26764 100009  51054
# keep only sites that have >=5 species (removed 68 - 61 = 7 sites)
community <- community %>% dplyr::filter(habitatN$sprichness >= 5)
habitatN <- habitatN %>% dplyr::filter(habitatN$sprichness >= 5)
#sort(colSums(community))
habitatN <- droplevels(habitatN)
community <- community[, colSums(community)>=20]
#Cluster31064  Cluster334816  Cluster672922  Cluster932857  Cluster989447  Cluster273565 Cluster1265861
beanplot(rowSums(community)~habitatN$Habitat, col = c("grey", "white"), xlab = "Habitat type", ylab = "Reads number")

# Kampstra, P. Beanplot: A Boxplot Alternative for Visual Comparison of Distributions. Journal of Statistical Software, Code Snippets 28(1). 1-9 (2008) 

nboot <- 999 # set to 999 for publication
# base model with no levels combined
reads.glm <- manyglm(rowSums(community) ~ habitatN$Habitat)
plot(reads.glm)

anova(reads.glm, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot)  # p = 0.04, df = 5, score = 8.671, nBoot = 999, 1 min. all reads number are not significantly defferent from each other
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.00 minutes...
##  Resampling run 100 finished. Time elapsed: 0.05 minutes...
##  Resampling run 200 finished. Time elapsed: 0.09 minutes...
##  Resampling run 300 finished. Time elapsed: 0.14 minutes...
##  Resampling run 400 finished. Time elapsed: 0.18 minutes...
##  Resampling run 500 finished. Time elapsed: 0.23 minutes...
##  Resampling run 600 finished. Time elapsed: 0.27 minutes...
##  Resampling run 700 finished. Time elapsed: 0.32 minutes...
##  Resampling run 800 finished. Time elapsed: 0.36 minutes...
##  Resampling run 900 finished. Time elapsed: 0.41 minutes...
## Time elapsed: 0 hr 0 min 27 sec
## Analysis of Variance Table
## 
## Model: manyglm(formula = rowSums(community) ~ habitatN$Habitat)
## 
## Multivariate test:
##                  Res.Df Df.diff score Pr(>score)  
## (Intercept)          60                           
## habitatN$Habitat     55       5 8.671      0.041 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments: P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).

we used the taxonomic assignment (RDP classifier) results to built a taxon distribution of all OTUs

#### for otus taxa tree
#install.packages("metacoder")
library(metacoder)
## Loading required package: taxa
## 
## Attaching package: 'taxa'
## The following object is masked from 'package:ggplot2':
## 
##     map_data
## This is metacoder verison 0.2.1 (stable). If you use metacoder for published research, please cite our paper:
## 
## Foster Z, Sharpton T and Grunwald N (2017). "Metacoder: An R package for visualization and manipulation of community taxonomic diversity data." PLOS Computational Biology, 13(2), pp. 1-15. doi: 10.1371/journal.pcbi.1005404
## 
## Enter `citation("metacoder")` for a BibTeX entry for this citation.
## 
## Attaching package: 'metacoder'
## The following object is masked from 'package:ape':
## 
##     complement
library(tibble)

mbc_otus <- read.table("data/forMetacoder/2014MBC_otu536_tax.txt", header = T, sep = "\t")
mbc_samples <- read.table("data/forMetacoder/2014MBC_sample536.txt", header = T, sep = "\t")

mbc_otus[mbc_otus>1] <- 1 # using presence/absance data

#str(mbc_otus)
#str(mbc_samples)
## change data type 
mbc_otus$OTU_id <- as.character(mbc_otus$OTU_id)
mbc_otus$lineage <- as.character(mbc_otus$lineage)
mbc_samples$Site <- as.character(mbc_samples$Site)
mbc_samples$Habitat <- as.character(mbc_samples$Habitat)
##
mbc_otus <- as_tibble(mbc_otus)
mbc_samples <- as_tibble(mbc_samples)
#print(mbc_otus)
#print(mbc_samples)

obj <- parse_tax_data(mbc_otus, class_cols = "lineage", class_sep = ";",
                      class_key = c(tax_rank = "info", tax_name = "taxon_name"),
                      class_regex = "^(.+)__(.+)$")

# This returns a taxmap object. The taxmap class is designed to store any number of tables, lists, or vectors associated with taxonomic information and facilitate manipulating the data in a cohesive way. Here is what that object looks like:

print(obj) # or click on the object in the Environment pane
## <Taxmap>
##   144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
##   144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
##   2 data sets:
##     tax_data:
##       # A tibble: 536 x 64
##         taxon_id OTU_id  lineage    BB04  BB05  BB06  BB07  BB08
##         <chr>    <chr>   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 ao       Cluste… r__root;…     0     0     0     0     0
##       2 ag       Cluste… r__root;…     0     0     0     0     0
##       3 ad       Cluste… r__root;…     0     0     0     0     0
##       # ... with 533 more rows, and 56 more variables:
##       #   BB09 <dbl>, BB10 <dbl>, CL01 <dbl>, CL03 <dbl>,
##       #   CL04 <dbl>, CL05 <dbl>, CL06 <dbl>, CL07 <dbl>,
##       #   CL08 <dbl>, CL09 <dbl>, CL10 <dbl>, CL11 <dbl>,
##       #   CL12 <dbl>, CL13 <dbl>, CL14 <dbl>, CL15 <dbl>,
##       #   CL16 <dbl>, EC01 <dbl>, EC05 <dbl>, EC06 <dbl>,
##       #   EC07 <dbl>, EC08 <dbl>, EC09 <dbl>, EC10 <dbl>,
##       #   JC01 <dbl>, JC02 <dbl>, JC03 <dbl>, JC05 <dbl>,
##       #   JC06 <dbl>, JC07 <dbl>, JC09 <dbl>, JC10 <dbl>,
##       #   JC11 <dbl>, JC12 <dbl>, MF01 <dbl>, MF02 <dbl>,
##       #   MF04 <dbl>, MF05 <dbl>, MF06 <dbl>, MF07 <dbl>,
##       #   MF08 <dbl>, MF09 <dbl>, MF10 <dbl>, NF02 <dbl>,
##       #   NF05 <dbl>, NF06 <dbl>, NF07 <dbl>, NF08 <dbl>,
##       #   NF09 <dbl>, NF10 <dbl>, NF11 <dbl>, NF12 <dbl>,
##       #   NF13 <dbl>, NF14 <dbl>, NF15 <dbl>, NF16 <dbl>
##     class_data:
##       # A tibble: 2,112 x 5
##         taxon_id input_index tax_rank tax_name   regex_match  
##         <chr>          <int> <chr>    <chr>      <chr>        
##       1 ab                 1 r        root       r__root      
##       2 ac                 1 p        Arthropoda p__Arthropoda
##       3 ad                 1 c        Insecta    c__Insecta   
##       # ... with 2,109 more rows
##   0 functions:
# accounting for un-even sampling
obj$data$tax_data <- calc_obs_props(obj, "tax_data")
## No `cols` specified, so using all numeric columns:
##    BB04, BB05, BB06, BB07, BB08 ... NF12, NF13, NF14, NF15, NF16
## Calculating proportions from counts for 61 columns for 536 observations.
print(obj)
## <Taxmap>
##   144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
##   144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
##   2 data sets:
##     tax_data:
##       # A tibble: 536 x 62
##         taxon_id  BB04  BB05  BB06  BB07  BB08  BB09   BB10
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
##       1 ao           0     0     0     0     0     0 0     
##       2 ag           0     0     0     0     0     0 0.0476
##       3 ad           0     0     0     0     0     0 0     
##       # ... with 533 more rows, and 54 more variables:
##       #   CL01 <dbl>, CL03 <dbl>, CL04 <dbl>, CL05 <dbl>,
##       #   CL06 <dbl>, CL07 <dbl>, CL08 <dbl>, CL09 <dbl>,
##       #   CL10 <dbl>, CL11 <dbl>, CL12 <dbl>, CL13 <dbl>,
##       #   CL14 <dbl>, CL15 <dbl>, CL16 <dbl>, EC01 <dbl>,
##       #   EC05 <dbl>, EC06 <dbl>, EC07 <dbl>, EC08 <dbl>,
##       #   EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
##       #   JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>,
##       #   JC09 <dbl>, JC10 <dbl>, JC11 <dbl>, JC12 <dbl>,
##       #   MF01 <dbl>, MF02 <dbl>, MF04 <dbl>, MF05 <dbl>,
##       #   MF06 <dbl>, MF07 <dbl>, MF08 <dbl>, MF09 <dbl>,
##       #   MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
##       #   NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>,
##       #   NF11 <dbl>, NF12 <dbl>, NF13 <dbl>, NF14 <dbl>,
##       #   NF15 <dbl>, NF16 <dbl>
##     class_data:
##       # A tibble: 2,112 x 5
##         taxon_id input_index tax_rank tax_name   regex_match  
##         <chr>          <int> <chr>    <chr>      <chr>        
##       1 ab                 1 r        root       r__root      
##       2 ac                 1 p        Arthropoda p__Arthropoda
##       3 ad                 1 c        Insecta    c__Insecta   
##       # ... with 2,109 more rows
##   0 functions:
# Getting per-taxon information
# Currently, we have values for the abundance of each OTU, not each taxon. To get information on the taxa, we can sum the abundance per-taxon like so:
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data",
                                       cols = mbc_samples$Site)
## Summing per-taxon counts from 61 columns for 144 taxa
print(obj)
## <Taxmap>
##   144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
##   144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
##   3 data sets:
##     tax_data:
##       # A tibble: 536 x 62
##         taxon_id  BB04  BB05  BB06  BB07  BB08  BB09   BB10
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
##       1 ao           0     0     0     0     0     0 0     
##       2 ag           0     0     0     0     0     0 0.0476
##       3 ad           0     0     0     0     0     0 0     
##       # ... with 533 more rows, and 54 more variables:
##       #   CL01 <dbl>, CL03 <dbl>, CL04 <dbl>, CL05 <dbl>,
##       #   CL06 <dbl>, CL07 <dbl>, CL08 <dbl>, CL09 <dbl>,
##       #   CL10 <dbl>, CL11 <dbl>, CL12 <dbl>, CL13 <dbl>,
##       #   CL14 <dbl>, CL15 <dbl>, CL16 <dbl>, EC01 <dbl>,
##       #   EC05 <dbl>, EC06 <dbl>, EC07 <dbl>, EC08 <dbl>,
##       #   EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
##       #   JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>,
##       #   JC09 <dbl>, JC10 <dbl>, JC11 <dbl>, JC12 <dbl>,
##       #   MF01 <dbl>, MF02 <dbl>, MF04 <dbl>, MF05 <dbl>,
##       #   MF06 <dbl>, MF07 <dbl>, MF08 <dbl>, MF09 <dbl>,
##       #   MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
##       #   NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>,
##       #   NF11 <dbl>, NF12 <dbl>, NF13 <dbl>, NF14 <dbl>,
##       #   NF15 <dbl>, NF16 <dbl>
##     class_data:
##       # A tibble: 2,112 x 5
##         taxon_id input_index tax_rank tax_name   regex_match  
##         <chr>          <int> <chr>    <chr>      <chr>        
##       1 ab                 1 r        root       r__root      
##       2 ac                 1 p        Arthropoda p__Arthropoda
##       3 ad                 1 c        Insecta    c__Insecta   
##       # ... with 2,109 more rows
##     tax_abund:
##       # A tibble: 144 x 62
##         taxon_id  BB04  BB05  BB06  BB07  BB08  BB09  BB10  CL01
##       * <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 ab       1     1     1     1     1     1     1         1
##       2 ac       1     1     1     1     1     1     1         1
##       3 ad       0.955 0.812 0.864 0.944 0.571 0.808 0.905     1
##       # ... with 141 more rows, and 53 more variables:
##       #   CL03 <dbl>, CL04 <dbl>, CL05 <dbl>, CL06 <dbl>,
##       #   CL07 <dbl>, CL08 <dbl>, CL09 <dbl>, CL10 <dbl>,
##       #   CL11 <dbl>, CL12 <dbl>, CL13 <dbl>, CL14 <dbl>,
##       #   CL15 <dbl>, CL16 <dbl>, EC01 <dbl>, EC05 <dbl>,
##       #   EC06 <dbl>, EC07 <dbl>, EC08 <dbl>, EC09 <dbl>,
##       #   EC10 <dbl>, JC01 <dbl>, JC02 <dbl>, JC03 <dbl>,
##       #   JC05 <dbl>, JC06 <dbl>, JC07 <dbl>, JC09 <dbl>,
##       #   JC10 <dbl>, JC11 <dbl>, JC12 <dbl>, MF01 <dbl>,
##       #   MF02 <dbl>, MF04 <dbl>, MF05 <dbl>, MF06 <dbl>,
##       #   MF07 <dbl>, MF08 <dbl>, MF09 <dbl>, MF10 <dbl>,
##       #   NF02 <dbl>, NF05 <dbl>, NF06 <dbl>, NF07 <dbl>,
##       #   NF08 <dbl>, NF09 <dbl>, NF10 <dbl>, NF11 <dbl>,
##       #   NF12 <dbl>, NF13 <dbl>, NF14 <dbl>, NF15 <dbl>,
##       #   NF16 <dbl>
##   0 functions:
# Note that there is now an additional table with one row per taxon.
# We can also easily calculate the number of samples have reads for each taxon:
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = mbc_samples$Habitat)
## No `cols` specified, so using all numeric columns:
##    BB04, BB05, BB06, BB07, BB08 ... NF12, NF13, NF14, NF15, NF16
## Calculating number of samples with non-zero counts from 61 columns in 6 groups for 144 observations
print(obj)
## <Taxmap>
##   144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
##   144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
##   4 data sets:
##     tax_data:
##       # A tibble: 536 x 62
##         taxon_id  BB04  BB05  BB06  BB07  BB08  BB09   BB10
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
##       1 ao           0     0     0     0     0     0 0     
##       2 ag           0     0     0     0     0     0 0.0476
##       3 ad           0     0     0     0     0     0 0     
##       # ... with 533 more rows, and 54 more variables:
##       #   CL01 <dbl>, CL03 <dbl>, CL04 <dbl>, CL05 <dbl>,
##       #   CL06 <dbl>, CL07 <dbl>, CL08 <dbl>, CL09 <dbl>,
##       #   CL10 <dbl>, CL11 <dbl>, CL12 <dbl>, CL13 <dbl>,
##       #   CL14 <dbl>, CL15 <dbl>, CL16 <dbl>, EC01 <dbl>,
##       #   EC05 <dbl>, EC06 <dbl>, EC07 <dbl>, EC08 <dbl>,
##       #   EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
##       #   JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>,
##       #   JC09 <dbl>, JC10 <dbl>, JC11 <dbl>, JC12 <dbl>,
##       #   MF01 <dbl>, MF02 <dbl>, MF04 <dbl>, MF05 <dbl>,
##       #   MF06 <dbl>, MF07 <dbl>, MF08 <dbl>, MF09 <dbl>,
##       #   MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
##       #   NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>,
##       #   NF11 <dbl>, NF12 <dbl>, NF13 <dbl>, NF14 <dbl>,
##       #   NF15 <dbl>, NF16 <dbl>
##     class_data:
##       # A tibble: 2,112 x 5
##         taxon_id input_index tax_rank tax_name   regex_match  
##         <chr>          <int> <chr>    <chr>      <chr>        
##       1 ab                 1 r        root       r__root      
##       2 ac                 1 p        Arthropoda p__Arthropoda
##       3 ad                 1 c        Insecta    c__Insecta   
##       # ... with 2,109 more rows
##     tax_abund:
##       # A tibble: 144 x 62
##         taxon_id  BB04  BB05  BB06  BB07  BB08  BB09  BB10  CL01
##       * <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 ab       1     1     1     1     1     1     1         1
##       2 ac       1     1     1     1     1     1     1         1
##       3 ad       0.955 0.812 0.864 0.944 0.571 0.808 0.905     1
##       # ... with 141 more rows, and 53 more variables:
##       #   CL03 <dbl>, CL04 <dbl>, CL05 <dbl>, CL06 <dbl>,
##       #   CL07 <dbl>, CL08 <dbl>, CL09 <dbl>, CL10 <dbl>,
##       #   CL11 <dbl>, CL12 <dbl>, CL13 <dbl>, CL14 <dbl>,
##       #   CL15 <dbl>, CL16 <dbl>, EC01 <dbl>, EC05 <dbl>,
##       #   EC06 <dbl>, EC07 <dbl>, EC08 <dbl>, EC09 <dbl>,
##       #   EC10 <dbl>, JC01 <dbl>, JC02 <dbl>, JC03 <dbl>,
##       #   JC05 <dbl>, JC06 <dbl>, JC07 <dbl>, JC09 <dbl>,
##       #   JC10 <dbl>, JC11 <dbl>, JC12 <dbl>, MF01 <dbl>,
##       #   MF02 <dbl>, MF04 <dbl>, MF05 <dbl>, MF06 <dbl>,
##       #   MF07 <dbl>, MF08 <dbl>, MF09 <dbl>, MF10 <dbl>,
##       #   NF02 <dbl>, NF05 <dbl>, NF06 <dbl>, NF07 <dbl>,
##       #   NF08 <dbl>, NF09 <dbl>, NF10 <dbl>, NF11 <dbl>,
##       #   NF12 <dbl>, NF13 <dbl>, NF14 <dbl>, NF15 <dbl>,
##       #   NF16 <dbl>
##     tax_occ:
##       # A tibble: 144 x 7
##         taxon_id    BB    CL    EC    JC    MF    NF
##         <chr>    <int> <int> <int> <int> <int> <int>
##       1 ab           7    15     7    10     9    13
##       2 ac           7    15     7    10     9    13
##       3 ad           7    15     7    10     9    13
##       # ... with 141 more rows
##   0 functions:
# Plotting taxonomic data
# Now that we have per-taxon information, we can plot the information using heat trees. The code below plots the number of “Nose” samples that have reads for each taxon. It also plots the number of OTUs assigned to each taxon in the overall dataset.

heat_tree(obj, 
          node_label = obj$taxon_names(),
          node_size = obj$n_obs(),
          node_color = obj$data$tax_occ$NF, 
          node_size_axis_label = "OTU count",
          node_color_axis_label = "Samples with reads")

# Comparing any number of treatments/groups
obj$data$diff_table <- compare_groups(obj, dataset = "tax_abund",
                                      cols = mbc_samples$Site,
                                      groups = mbc_samples$Habitat)
print(obj$data$diff_table)
## # A tibble: 2,160 x 7
##    taxon_id treatment_1 treatment_2 log2_median_rat… median_diff mean_diff
##  * <chr>    <chr>       <chr>                  <dbl>       <dbl>     <dbl>
##  1 ab       BB          CL                     0          0         0     
##  2 ac       BB          CL                     0          0         0     
##  3 ad       BB          CL                    -0.153     -0.0964   -0.111 
##  4 ae       BB          CL                     0.856      0.0213    0.0710
##  5 af       BB          CL                     0          0        -0.0141
##  6 ag       BB          CL                     0.495      0.0264    0.0143
##  7 ah       BB          CL                     0.856      0.0213    0.0710
##  8 ai       BB          CL                    -0.737     -0.182    -0.163 
##  9 aj       BB          CL                     0          0         0.0168
## 10 ak       BB          CL                  -Inf         -0.0645   -0.0643
## # ... with 2,150 more rows, and 1 more variable: wilcox_p_value <dbl>
heat_tree_matrix(obj,
                 dataset = "diff_table",
                 node_size = n_obs,
                 node_label = taxon_names,
                 node_color = log2_median_ratio,
                 node_color_range = diverging_palette(),
                 node_color_trans = "linear",
                 node_color_interval = c(-3, 3),
                 edge_color_interval = c(-3, 3),
                 node_size_axis_label = "Number of OTUs",
                 node_color_axis_label = "Log2 ratio median proportions")

# page(compare_groups) #check codes of compare_groups, change "median" to "mean"

##### remove CL group to see difference in  forest 
otus_without_CL <- mbc_otus %>% dplyr::select(-c(CL01:CL16))
samples_without_CL <- mbc_samples %>% dplyr::filter(mbc_samples$Habitat %in% c("BB", "EC", "JC", "MF", "NF"))

print(otus_without_CL)
## # A tibble: 536 x 48
##    OTU_id  lineage    BB04  BB05  BB06  BB07  BB08  BB09  BB10  EC01  EC05
##    <chr>   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Cluste… r__root;…     0     0     0     0     0     0     0     0     1
##  2 Cluste… r__root;…     0     0     0     0     0     0     1     0     1
##  3 Cluste… r__root;…     0     0     0     0     0     0     0     0     0
##  4 Cluste… r__root;…     0     0     0     0     0     0     0     0     0
##  5 Cluste… r__root;…     0     0     0     0     0     1     0     0     0
##  6 Cluste… r__root;…     0     0     0     0     0     0     0     0     0
##  7 Cluste… r__root;…     0     0     0     0     0     0     0     0     0
##  8 Cluste… r__root;…     0     0     0     0     0     0     0     0     0
##  9 Cluste… r__root;…     0     0     0     0     0     0     0     0     0
## 10 Cluste… r__root;…     0     0     0     0     0     0     0     0     0
## # ... with 526 more rows, and 37 more variables: EC06 <dbl>, EC07 <dbl>,
## #   EC08 <dbl>, EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
## #   JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>, JC09 <dbl>,
## #   JC10 <dbl>, JC11 <dbl>, JC12 <dbl>, MF01 <dbl>, MF02 <dbl>,
## #   MF04 <dbl>, MF05 <dbl>, MF06 <dbl>, MF07 <dbl>, MF08 <dbl>,
## #   MF09 <dbl>, MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
## #   NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>, NF11 <dbl>,
## #   NF12 <dbl>, NF13 <dbl>, NF14 <dbl>, NF15 <dbl>, NF16 <dbl>
print(samples_without_CL)
## # A tibble: 46 x 4
##    Site  Habitat  Type Altitude
##    <chr> <chr>   <int>    <int>
##  1 BB04  BB          1      900
##  2 BB05  BB          1      800
##  3 BB06  BB          1      850
##  4 BB07  BB          1      800
##  5 BB08  BB          1      750
##  6 BB09  BB          1      800
##  7 BB10  BB          1      800
##  8 EC01  EC          3      500
##  9 EC05  EC          3      600
## 10 EC06  EC          3      600
## # ... with 36 more rows
#str(otus_without_CL)
#str(samples_without_CL)
objF <- parse_tax_data(otus_without_CL, class_cols = "lineage", class_sep = ";",
                      class_key = c(tax_rank = "info", tax_name = "taxon_name"),
                      class_regex = "^(.+)__(.+)$")

# This returns a taxmap object. The taxmap class is designed to store any number of tables, lists, or vectors associated with taxonomic information and facilitate manipulating the data in a cohesive way. Here is what that object looks like:

print(objF) # or click on the object in the Environment pane
## <Taxmap>
##   144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
##   144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
##   2 data sets:
##     tax_data:
##       # A tibble: 536 x 49
##         taxon_id OTU_id  lineage    BB04  BB05  BB06  BB07  BB08
##         <chr>    <chr>   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 ao       Cluste… r__root;…     0     0     0     0     0
##       2 ag       Cluste… r__root;…     0     0     0     0     0
##       3 ad       Cluste… r__root;…     0     0     0     0     0
##       # ... with 533 more rows, and 41 more variables:
##       #   BB09 <dbl>, BB10 <dbl>, EC01 <dbl>, EC05 <dbl>,
##       #   EC06 <dbl>, EC07 <dbl>, EC08 <dbl>, EC09 <dbl>,
##       #   EC10 <dbl>, JC01 <dbl>, JC02 <dbl>, JC03 <dbl>,
##       #   JC05 <dbl>, JC06 <dbl>, JC07 <dbl>, JC09 <dbl>,
##       #   JC10 <dbl>, JC11 <dbl>, JC12 <dbl>, MF01 <dbl>,
##       #   MF02 <dbl>, MF04 <dbl>, MF05 <dbl>, MF06 <dbl>,
##       #   MF07 <dbl>, MF08 <dbl>, MF09 <dbl>, MF10 <dbl>,
##       #   NF02 <dbl>, NF05 <dbl>, NF06 <dbl>, NF07 <dbl>,
##       #   NF08 <dbl>, NF09 <dbl>, NF10 <dbl>, NF11 <dbl>,
##       #   NF12 <dbl>, NF13 <dbl>, NF14 <dbl>, NF15 <dbl>,
##       #   NF16 <dbl>
##     class_data:
##       # A tibble: 2,112 x 5
##         taxon_id input_index tax_rank tax_name   regex_match  
##         <chr>          <int> <chr>    <chr>      <chr>        
##       1 ab                 1 r        root       r__root      
##       2 ac                 1 p        Arthropoda p__Arthropoda
##       3 ad                 1 c        Insecta    c__Insecta   
##       # ... with 2,109 more rows
##   0 functions:
# removing low abundance counts
#objF$data$tax_data <- zero_low_counts(objF, "tax_data", min_count = 5)
#no_reads <- rowSums(objF$data$tax_data[, samples_without_CL$Site]) == 0
#sum(no_reads)
#objF <- filter_obs(objF, "tax_data", ! no_reads, drop_taxa = TRUE)
#print(objF)

# accounting for un-even sampling
objF$data$tax_data <- calc_obs_props(objF, "tax_data")
## No `cols` specified, so using all numeric columns:
##    BB04, BB05, BB06, BB07, BB08 ... NF12, NF13, NF14, NF15, NF16
## Calculating proportions from counts for 46 columns for 536 observations.
print(objF)
## <Taxmap>
##   144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
##   144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
##   2 data sets:
##     tax_data:
##       # A tibble: 536 x 47
##         taxon_id  BB04  BB05  BB06  BB07  BB08  BB09   BB10
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
##       1 ao           0     0     0     0     0     0 0     
##       2 ag           0     0     0     0     0     0 0.0476
##       3 ad           0     0     0     0     0     0 0     
##       # ... with 533 more rows, and 39 more variables:
##       #   EC01 <dbl>, EC05 <dbl>, EC06 <dbl>, EC07 <dbl>,
##       #   EC08 <dbl>, EC09 <dbl>, EC10 <dbl>, JC01 <dbl>,
##       #   JC02 <dbl>, JC03 <dbl>, JC05 <dbl>, JC06 <dbl>,
##       #   JC07 <dbl>, JC09 <dbl>, JC10 <dbl>, JC11 <dbl>,
##       #   JC12 <dbl>, MF01 <dbl>, MF02 <dbl>, MF04 <dbl>,
##       #   MF05 <dbl>, MF06 <dbl>, MF07 <dbl>, MF08 <dbl>,
##       #   MF09 <dbl>, MF10 <dbl>, NF02 <dbl>, NF05 <dbl>,
##       #   NF06 <dbl>, NF07 <dbl>, NF08 <dbl>, NF09 <dbl>,
##       #   NF10 <dbl>, NF11 <dbl>, NF12 <dbl>, NF13 <dbl>,
##       #   NF14 <dbl>, NF15 <dbl>, NF16 <dbl>
##     class_data:
##       # A tibble: 2,112 x 5
##         taxon_id input_index tax_rank tax_name   regex_match  
##         <chr>          <int> <chr>    <chr>      <chr>        
##       1 ab                 1 r        root       r__root      
##       2 ac                 1 p        Arthropoda p__Arthropoda
##       3 ad                 1 c        Insecta    c__Insecta   
##       # ... with 2,109 more rows
##   0 functions:
# Getting per-taxon information
# Currently, we have values for the abundance of each OTU, not each taxon. To get information on the taxa, we can sum the abundance per-taxon like so:
objF$data$tax_abund <- calc_taxon_abund(objF, "tax_data",
                                       cols = samples_without_CL$Site)
## Summing per-taxon counts from 46 columns for 144 taxa
print(objF)
## <Taxmap>
##   144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
##   144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
##   3 data sets:
##     tax_data:
##       # A tibble: 536 x 47
##         taxon_id  BB04  BB05  BB06  BB07  BB08  BB09   BB10
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
##       1 ao           0     0     0     0     0     0 0     
##       2 ag           0     0     0     0     0     0 0.0476
##       3 ad           0     0     0     0     0     0 0     
##       # ... with 533 more rows, and 39 more variables:
##       #   EC01 <dbl>, EC05 <dbl>, EC06 <dbl>, EC07 <dbl>,
##       #   EC08 <dbl>, EC09 <dbl>, EC10 <dbl>, JC01 <dbl>,
##       #   JC02 <dbl>, JC03 <dbl>, JC05 <dbl>, JC06 <dbl>,
##       #   JC07 <dbl>, JC09 <dbl>, JC10 <dbl>, JC11 <dbl>,
##       #   JC12 <dbl>, MF01 <dbl>, MF02 <dbl>, MF04 <dbl>,
##       #   MF05 <dbl>, MF06 <dbl>, MF07 <dbl>, MF08 <dbl>,
##       #   MF09 <dbl>, MF10 <dbl>, NF02 <dbl>, NF05 <dbl>,
##       #   NF06 <dbl>, NF07 <dbl>, NF08 <dbl>, NF09 <dbl>,
##       #   NF10 <dbl>, NF11 <dbl>, NF12 <dbl>, NF13 <dbl>,
##       #   NF14 <dbl>, NF15 <dbl>, NF16 <dbl>
##     class_data:
##       # A tibble: 2,112 x 5
##         taxon_id input_index tax_rank tax_name   regex_match  
##         <chr>          <int> <chr>    <chr>      <chr>        
##       1 ab                 1 r        root       r__root      
##       2 ac                 1 p        Arthropoda p__Arthropoda
##       3 ad                 1 c        Insecta    c__Insecta   
##       # ... with 2,109 more rows
##     tax_abund:
##       # A tibble: 144 x 47
##         taxon_id  BB04  BB05  BB06  BB07  BB08  BB09  BB10  EC01
##       * <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 ab       1     1     1     1     1     1     1         1
##       2 ac       1     1     1     1     1     1     1         1
##       3 ad       0.955 0.812 0.864 0.944 0.571 0.808 0.905     1
##       # ... with 141 more rows, and 38 more variables:
##       #   EC05 <dbl>, EC06 <dbl>, EC07 <dbl>, EC08 <dbl>,
##       #   EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
##       #   JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>,
##       #   JC09 <dbl>, JC10 <dbl>, JC11 <dbl>, JC12 <dbl>,
##       #   MF01 <dbl>, MF02 <dbl>, MF04 <dbl>, MF05 <dbl>,
##       #   MF06 <dbl>, MF07 <dbl>, MF08 <dbl>, MF09 <dbl>,
##       #   MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
##       #   NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>,
##       #   NF11 <dbl>, NF12 <dbl>, NF13 <dbl>, NF14 <dbl>,
##       #   NF15 <dbl>, NF16 <dbl>
##   0 functions:
# Note that there is now an additional table with one row per taxon.
# We can also easily calculate the number of samples have reads for each taxon:
objF$data$tax_occ <- calc_n_samples(objF, "tax_abund", groups = samples_without_CL$Habitat)
## No `cols` specified, so using all numeric columns:
##    BB04, BB05, BB06, BB07, BB08 ... NF12, NF13, NF14, NF15, NF16
## Calculating number of samples with non-zero counts from 46 columns in 5 groups for 144 observations
print(objF)
## <Taxmap>
##   144 taxa: ab. root, ac. Arthropoda ... fo. Paratalanta
##   144 edges: NA->ab, ab->ac, ac->ad ... cn->fm, au->fn, cc->fo
##   4 data sets:
##     tax_data:
##       # A tibble: 536 x 47
##         taxon_id  BB04  BB05  BB06  BB07  BB08  BB09   BB10
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
##       1 ao           0     0     0     0     0     0 0     
##       2 ag           0     0     0     0     0     0 0.0476
##       3 ad           0     0     0     0     0     0 0     
##       # ... with 533 more rows, and 39 more variables:
##       #   EC01 <dbl>, EC05 <dbl>, EC06 <dbl>, EC07 <dbl>,
##       #   EC08 <dbl>, EC09 <dbl>, EC10 <dbl>, JC01 <dbl>,
##       #   JC02 <dbl>, JC03 <dbl>, JC05 <dbl>, JC06 <dbl>,
##       #   JC07 <dbl>, JC09 <dbl>, JC10 <dbl>, JC11 <dbl>,
##       #   JC12 <dbl>, MF01 <dbl>, MF02 <dbl>, MF04 <dbl>,
##       #   MF05 <dbl>, MF06 <dbl>, MF07 <dbl>, MF08 <dbl>,
##       #   MF09 <dbl>, MF10 <dbl>, NF02 <dbl>, NF05 <dbl>,
##       #   NF06 <dbl>, NF07 <dbl>, NF08 <dbl>, NF09 <dbl>,
##       #   NF10 <dbl>, NF11 <dbl>, NF12 <dbl>, NF13 <dbl>,
##       #   NF14 <dbl>, NF15 <dbl>, NF16 <dbl>
##     class_data:
##       # A tibble: 2,112 x 5
##         taxon_id input_index tax_rank tax_name   regex_match  
##         <chr>          <int> <chr>    <chr>      <chr>        
##       1 ab                 1 r        root       r__root      
##       2 ac                 1 p        Arthropoda p__Arthropoda
##       3 ad                 1 c        Insecta    c__Insecta   
##       # ... with 2,109 more rows
##     tax_abund:
##       # A tibble: 144 x 47
##         taxon_id  BB04  BB05  BB06  BB07  BB08  BB09  BB10  EC01
##       * <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 ab       1     1     1     1     1     1     1         1
##       2 ac       1     1     1     1     1     1     1         1
##       3 ad       0.955 0.812 0.864 0.944 0.571 0.808 0.905     1
##       # ... with 141 more rows, and 38 more variables:
##       #   EC05 <dbl>, EC06 <dbl>, EC07 <dbl>, EC08 <dbl>,
##       #   EC09 <dbl>, EC10 <dbl>, JC01 <dbl>, JC02 <dbl>,
##       #   JC03 <dbl>, JC05 <dbl>, JC06 <dbl>, JC07 <dbl>,
##       #   JC09 <dbl>, JC10 <dbl>, JC11 <dbl>, JC12 <dbl>,
##       #   MF01 <dbl>, MF02 <dbl>, MF04 <dbl>, MF05 <dbl>,
##       #   MF06 <dbl>, MF07 <dbl>, MF08 <dbl>, MF09 <dbl>,
##       #   MF10 <dbl>, NF02 <dbl>, NF05 <dbl>, NF06 <dbl>,
##       #   NF07 <dbl>, NF08 <dbl>, NF09 <dbl>, NF10 <dbl>,
##       #   NF11 <dbl>, NF12 <dbl>, NF13 <dbl>, NF14 <dbl>,
##       #   NF15 <dbl>, NF16 <dbl>
##     tax_occ:
##       # A tibble: 144 x 6
##         taxon_id    BB    EC    JC    MF    NF
##         <chr>    <int> <int> <int> <int> <int>
##       1 ab           7     7    10     9    13
##       2 ac           7     7    10     9    13
##       3 ad           7     7    10     9    13
##       # ... with 141 more rows
##   0 functions:
# Plotting taxonomic data
# Now that we have per-taxon information, we can plot the information using heat trees. The code below plots the number of “Nose” samples that have reads for each taxon. It also plots the number of OTUs assigned to each taxon in the overall dataset.

heat_tree(objF, 
          node_label = objF$taxon_names(),
          node_size = objF$n_obs(),
          node_color = objF$data$tax_occ$NF, 
          node_size_axis_label = "OTU count",
          node_color_axis_label = "Samples with reads")

# Comparing any number of treatments/groups
objF$data$diff_table <- compare_groups(objF, dataset = "tax_abund",
                                      cols = samples_without_CL$Site,
                                      groups = samples_without_CL$Habitat)
print(objF$data$diff_table)
## # A tibble: 1,440 x 7
##    taxon_id treatment_1 treatment_2 log2_median_rat… median_diff mean_diff
##  * <chr>    <chr>       <chr>                  <dbl>       <dbl>     <dbl>
##  1 ab       BB          EC                    0           0        0      
##  2 ac       BB          EC                    0           0        0      
##  3 ad       BB          EC                   -0.0860     -0.0530  -0.0778 
##  4 ae       BB          EC                   -0.585      -0.0238   0.0236 
##  5 af       BB          EC                    0           0       -0.0152 
##  6 ag       BB          EC                   -1.87       -0.242   -0.209  
##  7 ah       BB          EC                   -0.585      -0.0238   0.0236 
##  8 ai       BB          EC                    1.13        0.148    0.147  
##  9 aj       BB          EC                    0           0       -0.0130 
## 10 ak       BB          EC                    0           0       -0.00595
## # ... with 1,430 more rows, and 1 more variable: wilcox_p_value <dbl>
heat_tree_matrix(objF,
                 dataset = "diff_table",
                 node_size = n_obs,
                 node_label = taxon_names,
                 node_color = log2_median_ratio,
                 node_color_range = diverging_palette(),
                 node_color_trans = "linear",
                 node_color_interval = c(-3, 3),
                 edge_color_interval = c(-3, 3),
                 node_size_axis_label = "Number of OTUs",
                 node_color_axis_label = "Log2 ratio median proportions")

Alpha Diversity

communityB <- community
communityB[communityB>1] <- 1 # binary
rownames(communityB) # 61 rows = sites
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14"
## [15] "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28"
## [29] "29" "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42"
## [43] "43" "44" "45" "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56"
## [57] "57" "58" "59" "60" "61"
#beanplot(specnumber(communityB)~habitatN$Habitat, col = c("grey", "white"))
beanplot(specnumber(communityB)~habitatN$Habitat, col = c("grey", "white"), xlab = "Habitat type", ylab = "Species richness")

BB <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("BB"))
CL <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("CL"))
EC <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("EC"))
JC <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("JC"))
MF <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("MF"))
NF <- communityB %>% dplyr::filter(habitatN$Habitat %in% c("NF"))

BB_obs <- rowSums(BB)
CL_obs <- rowSums(CL)
EC_obs <- rowSums(EC)
JC_obs <- rowSums(JC)
MF_obs <- rowSums(MF)
NF_obs <- rowSums(NF)

t.test(BB_obs, CL_obs)
## 
##  Welch Two Sample t-test
## 
## data:  BB_obs and CL_obs
## t = -2.4614, df = 17.68, p-value = 0.02437
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -15.473257  -1.212457
## sample estimates:
## mean of x mean of y 
##  18.85714  27.20000
t.test(BB_obs, EC_obs)
## 
##  Welch Two Sample t-test
## 
## data:  BB_obs and EC_obs
## t = 1.2365, df = 11.86, p-value = 0.2402
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.276104 11.847533
## sample estimates:
## mean of x mean of y 
##  18.85714  14.57143
t.test(BB_obs, JC_obs)
## 
##  Welch Two Sample t-test
## 
## data:  BB_obs and JC_obs
## t = 1.7179, df = 14.006, p-value = 0.1078
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.355966 12.270252
## sample estimates:
## mean of x mean of y 
##  18.85714  13.40000
t.test(BB_obs, MF_obs)
## 
##  Welch Two Sample t-test
## 
## data:  BB_obs and MF_obs
## t = 0.28826, df = 13.88, p-value = 0.7774
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.241872  8.178380
## sample estimates:
## mean of x mean of y 
##  18.85714  17.88889
t.test(BB_obs, NF_obs)
## 
##  Welch Two Sample t-test
## 
## data:  BB_obs and NF_obs
## t = -1.3026, df = 17.574, p-value = 0.2095
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -15.464041   3.639866
## sample estimates:
## mean of x mean of y 
##  18.85714  24.76923
t.test(CL_obs, EC_obs)
## 
##  Welch Two Sample t-test
## 
## data:  CL_obs and EC_obs
## t = 3.5305, df = 16.24, p-value = 0.002725
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   5.054716 20.202427
## sample estimates:
## mean of x mean of y 
##  27.20000  14.57143
t.test(CL_obs, JC_obs)
## 
##  Welch Two Sample t-test
## 
## data:  CL_obs and JC_obs
## t = 4.1851, df = 22.811, p-value = 0.0003602
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   6.975625 20.624375
## sample estimates:
## mean of x mean of y 
##      27.2      13.4
t.test(CL_obs, MF_obs)
## 
##  Welch Two Sample t-test
## 
## data:  CL_obs and MF_obs
## t = 2.6807, df = 20.549, p-value = 0.01416
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   2.078197 16.544025
## sample estimates:
## mean of x mean of y 
##  27.20000  17.88889
t.test(CL_obs, NF_obs)
## 
##  Welch Two Sample t-test
## 
## data:  CL_obs and NF_obs
## t = 0.52569, df = 20.725, p-value = 0.6047
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.193017 12.054556
## sample estimates:
## mean of x mean of y 
##  27.20000  24.76923
t.test(EC_obs, JC_obs)
## 
##  Welch Two Sample t-test
## 
## data:  EC_obs and JC_obs
## t = 0.34698, df = 13.14, p-value = 0.7341
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.114172  8.457030
## sample estimates:
## mean of x mean of y 
##  14.57143  13.40000
t.test(EC_obs, MF_obs)
## 
##  Welch Two Sample t-test
## 
## data:  EC_obs and MF_obs
## t = -0.935, df = 13.446, p-value = 0.3663
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -10.95686   4.32194
## sample estimates:
## mean of x mean of y 
##  14.57143  17.88889
t.test(EC_obs, NF_obs)
## 
##  Welch Two Sample t-test
## 
## data:  EC_obs and NF_obs
## t = -2.1789, df = 17.931, p-value = 0.04293
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -20.0335211  -0.3620833
## sample estimates:
## mean of x mean of y 
##  14.57143  24.76923
t.test(JC_obs, MF_obs)
## 
##  Welch Two Sample t-test
## 
## data:  JC_obs and MF_obs
## t = -1.3744, df = 16.518, p-value = 0.1877
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -11.394902   2.417124
## sample estimates:
## mean of x mean of y 
##  13.40000  17.88889
t.test(JC_obs, NF_obs)
## 
##  Welch Two Sample t-test
## 
## data:  JC_obs and NF_obs
## t = -2.5433, df = 18.265, p-value = 0.02023
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -20.751158  -1.987304
## sample estimates:
## mean of x mean of y 
##  13.40000  24.76923
t.test(MF_obs, NF_obs)
## 
##  Welch Two Sample t-test
## 
## data:  MF_obs and NF_obs
## t = -1.4952, df = 18.868, p-value = 0.1514
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -16.516129   2.755445
## sample estimates:
## mean of x mean of y 
##  17.88889  24.76923
p_values <- c(0.0244, 0.2402, 0.1078, 0.7774, 0.2095, 0.0027, 0.0004, 0.0142, 0.6047, 0.7341, 0.3663, 0.0429, 0.1877, 0.0202, 0.1514)
p_values.corr.fdr<-p.adjust(p_values, method = "fdr", n = length(p_values)) 
p_values.corr.fdr
##  [1] 0.0732000 0.3275455 0.2310000 0.7774000 0.3142500 0.0202500 0.0060000
##  [8] 0.0710000 0.6977308 0.7774000 0.4578750 0.1072500 0.3128333 0.0732000
## [15] 0.2838750
# 0.0732000 0.3275455 0.2310000 0.7774000 0.3142500 0.0202500 0.0060000 0.0710000
# 0.6977308 0.7774000 0.4578750 0.1072500 0.3128333 0.0732000 0.2838750

traditional Chao

######## otu table with original reads number
(pool1 <- specpool(communityB, habitatN$Habitat))
##    Species     chao  chao.se     jack1 jack1.se    jack2     boot
## BB      83 185.9796 39.89775 132.71429 20.90210 165.8095 104.1117
## CL     194 336.1658 37.44452 295.73333 32.32777 358.8143 238.0445
## EC      64 115.4592 22.33722  99.14286 16.06619 120.0952  79.3403
## JC      85 209.6154 48.52236 139.00000 23.08246 177.7556 107.5053
## MF     119 405.5079 98.01090 203.44444 33.92075 267.8056 153.4993
## NF     210 507.3394 72.71267 346.61538 48.44615 445.4744 266.7256
##      boot.se  n
## BB  8.989364  7
## CL 15.679575 15
## EC  7.646678  7
## JC 10.816630 10
## MF 14.358124  9
## NF 22.180423 13

Species chao chao.se jack1 jack1.se jack2 boot boot.se n BB 83 185.9796 39.89775 132.71429 20.90210 165.8095 104.1117 8.989364 7 CL 194 336.1658 37.44452 295.73333 32.32777 358.8143 238.0445 15.679575 15 EC 64 115.4592 22.33722 99.14286 16.06619 120.0952 79.3403 7.646678 7 JC 85 209.6154 48.52236 139.00000 23.08246 177.7556 107.5053 10.816630 10 MF 119 405.5079 98.01090 203.44444 33.92075 267.8056 153.4993 14.358124 9 NF 210 507.3394 72.71267 346.61538 48.44615 445.4744 266.7256 22.180423 13

es_value <- c(185.9796, 115.4592, 209.6154, 405.5079, 336.1658, 507.3394)
se <- c(39.89775, 22.33722, 48.52236, 98.01090, 37.44452, 72.71267)

error.bar <- function(x, y, upper, lower=upper, length=0.1,...){
if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
stop("vectors must be same length")
arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
}

par(mfrow=c(1,2))
bar_es <- barplot(es_value, names.arg = c("BB", "EC", "JC", "MF", "CL", "NF"), col = "lightblue", ylim = c(0, 600), border = NA, ylab = "Species richness estimates")
error.bar(bar_es,es_value, se)
par(mfrow=c(1,1))

############ This function (t.test2) will calculate Welch's test
t.test2 <- function(m1, m2, s1, s2, n1, n2, m0=0, equal.variance=FALSE)
{
  if( equal.variance==FALSE ) 
  {
    se <- sqrt( (s1^2/n1) + (s2^2/n2) )
    # welch-satterthwaite df
    df <- ( (s1^2/n1 + s2^2/n2)^2 )/( (s1^2/n1)^2/(n1-1) + (s2^2/n2)^2/(n2-1) )
  } else
  {
    # pooled standard deviation, scaled by the sample sizes
    se <- sqrt( (1/n1 + 1/n2) * ((n1-1)*s1^2 + (n2-1)*s2^2)/(n1+n2-2) ) 
    df <- n1+n2-2
  }      
  t <- (m1-m2-m0)/se 
  dat <- c(m1-m2, se, t, round(df,1), 2*pt(-abs(t),df))    
  names(dat) <- c("Difference of means", "Std Error", "t", "df", "p-value")
  return(dat) 
}

pool1[1, 9]
## [1] 7
# t test for BB and CL
t.test2(pool1[1, 2], pool1[2, 2], pool1[1, 3]*sqrt(pool1[1, 9]), pool1[2, 3]*sqrt(pool1[2, 9]), pool1[1, 9], pool1[2, 9])
## Difference of means           Std Error                   t 
##       -150.18622013         54.71674629         -2.74479442 
##                  df             p-value 
##         15.90000000          0.01443101
# t test for BB and EC
t.test2(pool1[1, 2], pool1[3, 2], pool1[1, 3]*sqrt(pool1[1, 9]), pool1[3, 3]*sqrt(pool1[3, 9]), pool1[1, 9], pool1[3, 9])
## Difference of means           Std Error                   t 
##          70.5204082          45.7250681           1.5422702 
##                  df             p-value 
##           9.4000000           0.1558858
# t test for BB and JC
t.test2(pool1[1, 2], pool1[4, 2], pool1[1, 3]*sqrt(pool1[1, 9]), pool1[4, 3]*sqrt(pool1[4, 9]), pool1[1, 9], pool1[4, 9])
## Difference of means           Std Error                   t 
##         -23.6357928          62.8191847          -0.3762512 
##                  df             p-value 
##          15.0000000           0.7119989
# t test for BB and MF
t.test2(pool1[1, 2], pool1[5, 2], pool1[1, 3]*sqrt(pool1[1, 9]), pool1[5, 3]*sqrt(pool1[5, 9]), pool1[1, 9], pool1[5, 9])
## Difference of means           Std Error                   t 
##       -219.52834467        105.82044772         -2.07453615 
##                  df             p-value 
##         10.50000000          0.06350765
# t test for BB and NF
t.test2(pool1[1, 2], pool1[6, 2], pool1[1, 3]*sqrt(pool1[1, 9]), pool1[6, 3]*sqrt(pool1[6, 9]), pool1[1, 9], pool1[6, 9])
## Difference of means           Std Error                   t 
##       -321.35977468         82.93951227         -3.87462822 
##                  df             p-value 
##         17.20000000          0.00119455
# t test for CL and EC
t.test2(pool1[2, 2], pool1[3, 2], pool1[2, 3]*sqrt(pool1[2, 9]), pool1[3, 3]*sqrt(pool1[3, 9]), pool1[2, 9], pool1[3, 9])
## Difference of means           Std Error                   t 
##        2.207066e+02        4.360096e+01        5.061967e+00 
##                  df             p-value 
##        1.990000e+01        6.078428e-05
# t test for CL and JC
t.test2(pool1[2, 2], pool1[4, 2], pool1[2, 3]*sqrt(pool1[2, 9]), pool1[4, 3]*sqrt(pool1[4, 9]), pool1[2, 9], pool1[4, 9])
## Difference of means           Std Error                   t 
##        126.55042735         61.29038815          2.06476792 
##                  df             p-value 
##         18.70000000          0.05312766
# t test for CL and MF
t.test2(pool1[2, 2], pool1[5, 2], pool1[2, 3]*sqrt(pool1[2, 9]), pool1[5, 3]*sqrt(pool1[5, 9]), pool1[2, 9], pool1[5, 9])
## Difference of means           Std Error                   t 
##         -69.3421245         104.9201070          -0.6609041 
##                  df             p-value 
##          10.4000000           0.5230721
# t test for CL and NF
t.test2(pool1[2, 2], pool1[6, 2], pool1[2, 3]*sqrt(pool1[2, 9]), pool1[6, 3]*sqrt(pool1[6, 9]), pool1[2, 9], pool1[6, 9])
## Difference of means           Std Error                   t 
##       -171.17355455         81.78767880         -2.09290149 
##                  df             p-value 
##         18.10000000          0.05069616
# t test for EC and JC
t.test2(pool1[3, 2], pool1[4, 2], pool1[3, 3]*sqrt(pool1[3, 9]), pool1[4, 3]*sqrt(pool1[4, 9]), pool1[3, 9], pool1[4, 9])
## Difference of means           Std Error                   t 
##         -94.1562009          53.4169562          -1.7626650 
##                  df             p-value 
##          12.4000000           0.1025966
# t test for EC and MF
t.test2(pool1[3, 2], pool1[5, 2], pool1[3, 3]*sqrt(pool1[3, 9]), pool1[5, 3]*sqrt(pool1[5, 9]), pool1[3, 9], pool1[5, 9])
## Difference of means           Std Error                   t 
##        -290.0487528         100.5240687          -2.8853662 
##                  df             p-value 
##           8.8000000           0.0183903
# t test for EC and NF
t.test2(pool1[3, 2], pool1[6, 2], pool1[3, 3]*sqrt(pool1[3, 9]), pool1[6, 3]*sqrt(pool1[6, 9]), pool1[3, 9], pool1[6, 9])
## Difference of means           Std Error                   t 
##       -3.918802e+02        7.606631e+01       -5.151823e+00 
##                  df             p-value 
##        1.410000e+01        1.430669e-04
# t test for JC and MF
t.test2(pool1[4, 2], pool1[5, 2], pool1[4, 3]*sqrt(pool1[4, 9]), pool1[5, 3]*sqrt(pool1[5, 9]), pool1[4, 9], pool1[5, 9])
## Difference of means           Std Error                   t 
##       -195.89255189        109.36432924         -1.79119237 
##                  df             p-value 
##         11.80000000          0.09898252
# t test for JC and NF
t.test2(pool1[4, 2], pool1[6, 2], pool1[4, 3]*sqrt(pool1[4, 9]), pool1[6, 3]*sqrt(pool1[6, 9]), pool1[4, 9], pool1[6, 9])
## Difference of means           Std Error                   t 
##       -2.977240e+02        8.741597e+01       -3.405831e+00 
##                  df             p-value 
##        1.980000e+01        2.830142e-03
# t test for MF and NF
t.test2(pool1[5, 2], pool1[6, 2], pool1[5, 3]*sqrt(pool1[5, 9]), pool1[6, 3]*sqrt(pool1[6, 9]), pool1[5, 9], pool1[6, 9])
## Difference of means           Std Error                   t 
##        -101.8314300         122.0379828          -0.8344241 
##                  df             p-value 
##          16.0000000           0.4163281
# BB_CL, BB_EC, BB_JC, BB_MF, BB_NF, CL_EC, CL_JC, CL_MF, CL_NF, EC_JC, EC_MF, EC_NF, JC_MF, JC_NF, MF_NF
p_values <- c(0.0144, 0.1530, 0.7236, 0.0613, 0.0012, 0.0001, 0.0515, 0.4953, 0.0519, 0.1041, 0.0182, 0.0001, 0.0933, 0.0028, 0.4543)
p_values.corr.fdr<-p.adjust(p_values, method = "fdr", n = length(p_values)) 
p_values.corr.fdr
##  [1] 0.0432000 0.1912500 0.7236000 0.1021667 0.0060000 0.0007500 0.0973125
##  [8] 0.5306786 0.0973125 0.1419545 0.0455000 0.0007500 0.1399500 0.0105000
## [15] 0.5241923
# 0.0432000 0.1912500 0.7236000 0.1021667 0.0060000 0.0007500 0.0973125 0.5306786
# 0.0973125 0.1419545 0.0455000 0.0007500 0.1399500 0.0105000 0.5241923

Interpolation and extrapolation of Hill number

# http://chao.stat.nthu.edu.tw/wordpress/wp-content/uploads/software/iNEXT_UserGuide.pdf

cname <- c("BB","CL","EC","JC","MF","NF")

comm4inext_abun <- matrix(c(colSums(BB), colSums(CL), colSums(EC), colSums(JC), colSums(MF), colSums(NF)), ncol = 6)

colnames(comm4inext_abun) <- cname
colnameBB <- colnames(BB)
rownames(comm4inext_abun) <- colnameBB

comm4inext <- rbind(c(nrow(BB), nrow(CL), nrow(EC), nrow(JC), nrow(MF), nrow(NF)), comm4inext_abun) #
#comm4inext

confnum=0.95 # set confidence here
outcomm0 <- iNEXT(comm4inext, q=0, conf=confnum, datatype="incidence_freq")
# Hill numbers (q):  0 = sp richness, 1 = Shannon, 2 = inverse Simpson
outcomm0$DataInfo
##   site  T   U S.obs     SC  Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10
## 1   BB  7 132    83 0.5933  58 14  3  3  5  0  0  0  0   0
## 2   CL 15 408   194 0.7458 109 39 19  9  1 11  2  0  2   0
## 3   EC  7 102    64 0.6391  41 14  3  6  0  0  0  0  0   0
## 4   JC 10 134    85 0.5728  60 13  6  3  1  1  1  0  0   0
## 5   MF  9 161   119 0.4309  95 14  4  5  0  1  0  0  0   0
## 6   NF 13 322   210 0.5573 148 34 15  8  3  0  2  0  0   0
ChaoRichness(comm4inext, datatype="incidence_freq") # same as specpool results, so i trust that we have done this correctly
##    Observed Estimator Est_s.e. 95% Lower 95% Upper
## BB       83   185.980   39.898   132.480   297.326
## CL      194   336.166   37.445   279.575   430.179
## EC       64   115.459   22.337    86.793   180.178
## JC       85   209.615   48.522   144.670   345.246
## MF      119   405.508   98.011   268.269   668.926
## NF      210   507.339   72.713   395.401   686.863
ChaoShannon(comm4inext, datatype="incidence_freq")
##    Observed Estimator Est_s.e 95% Lower 95% Upper
## BB    4.230     4.899   0.134     4.637     5.162
## CL    4.974     5.397   0.065     5.270     5.525
## EC    4.012     4.577   0.143     4.297     4.856
## JC    4.250     4.987   0.154     4.684     5.290
## MF    4.640     5.702   0.175     5.359     6.046
## NF    5.177     5.938   0.098     5.746     6.130
outI <- iNEXT(comm4inext, q=c(0,1,2), conf=confnum, datatype="incidence_freq")
# Sample‐size‐based R/E curves, separating by "site"
ggiNEXT(outI, type=1, facet.var="site") +theme_bw(base_size = 18)

# Sample‐size‐based R/E curves, separating by "order"
ggiNEXT(outI, type=1, facet.var="order")+theme_bw(base_size = 18)

commPD <- communityB
#remove 3 OTUs with very long branches
commPD$Cluster418132 <- NULL
commPD$Cluster636975 <- NULL
commPD$Cluster549885 <- NULL

BB <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("BB"))
CL <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("CL"))
EC <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("EC"))
JC <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("JC"))
MF <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("MF"))
NF <- commPD %>% dplyr::filter(habitatN$Habitat %in% c("NF"))
comm4inextPD <- matrix(c(colSums(BB), colSums(CL), colSums(EC), colSums(JC), colSums(MF), colSums(NF)), ncol = 6)

colnames(comm4inextPD) <- cname
colnameBB <- colnames(BB)
rownames(comm4inextPD) <- colnameBB

MLtree.tre <- read.table("./data/2014MBC_535otu-2collembola_align533.newick")
ML.tre <- ade4::newick2phylog(MLtree.tre$V1)
ML.lab <- rownames(comm4inextPD)

#rownames(comm4inext_abun)
BBnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("BB"))
CLnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("CL"))
ECnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("EC"))
JCnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("JC"))
MFnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("MF"))
NFnames <- habitatN %>% dplyr::filter(habitatN$Habitat %in% c("NF"))

rownames(BB) <- BBnames$Site
rownames(CL) <- CLnames$Site
rownames(EC) <- ECnames$Site
rownames(JC) <- JCnames$Site
rownames(MF) <- MFnames$Site
rownames(NF) <- NFnames$Site

BBnames$Site
## [1] "BB04" "BB05" "BB06" "BB07" "BB08" "BB09" "BB10"
#commB <- list(BB.Site = t(BB), CL.Site = t(CL), EC.Site = t(EC), JC.Site = t(JC), MF.Site = t(MF), NF.Site = t(NF))
commB <- list(BB = t(BB), CL = t(CL), EC = t(EC), JC = t(JC), MF = t(MF), NF = t(NF))


out <- iNextPD(commB, ML.lab, ML.tre, q=c(0, 1, 2), datatype="incidence_raw", endpoint = 30, se = TRUE)
# Sample‐size‐based R/E curves, separating by "site""
ggiNEXT(out, type=1, facet.var="site") +theme_bw(base_size = 18)

# Sample‐size‐based R/E curves, separating by "order"
ggiNEXT(out, type=1, facet.var="order")+theme_bw(base_size = 18)

# display black‐white theme
ggiNEXT(out, type=1, facet.var="order", grey=TRUE)

table.phylog(comm4inextPD, ML.tre, csize=2, f.phylog=0.7)

Beta diversity turnover versus nestedness

Run tabasco before removing zerotons and singletons

community.jmds <- metaMDS(communityB, distance = "jaccard", trymax = 40, binary=TRUE)
## Run 0 stress 0.1937798 
## Run 1 stress 0.1993032 
## Run 2 stress 0.2046784 
## Run 3 stress 0.2039088 
## Run 4 stress 0.1938151 
## ... Procrustes: rmse 0.04310288  max resid 0.1929209 
## Run 5 stress 0.1941205 
## ... Procrustes: rmse 0.05150645  max resid 0.2706739 
## Run 6 stress 0.1951905 
## Run 7 stress 0.2050531 
## Run 8 stress 0.2088771 
## Run 9 stress 0.199114 
## Run 10 stress 0.1928347 
## ... New best solution
## ... Procrustes: rmse 0.03670751  max resid 0.1093344 
## Run 11 stress 0.1966296 
## Run 12 stress 0.1918593 
## ... New best solution
## ... Procrustes: rmse 0.03193756  max resid 0.1400837 
## Run 13 stress 0.1965023 
## Run 14 stress 0.1928589 
## Run 15 stress 0.1937054 
## Run 16 stress 0.203305 
## Run 17 stress 0.1979156 
## Run 18 stress 0.1927809 
## Run 19 stress 0.1935329 
## Run 20 stress 0.1911866 
## ... New best solution
## ... Procrustes: rmse 0.02706889  max resid 0.1126668 
## Run 21 stress 0.19505 
## Run 22 stress 0.1932218 
## Run 23 stress 0.1932825 
## Run 24 stress 0.2052633 
## Run 25 stress 0.1980282 
## Run 26 stress 0.1931746 
## Run 27 stress 0.1981246 
## Run 28 stress 0.1987869 
## Run 29 stress 0.2031158 
## Run 30 stress 0.1959541 
## Run 31 stress 0.1921867 
## Run 32 stress 0.198956 
## Run 33 stress 0.1920951 
## Run 34 stress 0.2008074 
## Run 35 stress 0.19739 
## Run 36 stress 0.2005727 
## Run 37 stress 0.1995965 
## Run 38 stress 0.2034736 
## Run 39 stress 0.2009664 
## Run 40 stress 0.2056193 
## *** No convergence -- monoMDS stopping criteria:
##      1: no. of iterations >= maxit
##     39: stress ratio > sratmax
community.jmds <- metaMDS(communityB, distance = "jaccard", binary = TRUE, previous.best = community.jmds)  # doesn't converge well, with final stress > 0.20
## Starting from 2-dimensional configuration
## Run 0 stress 0.1911866 
## Run 1 stress 0.1913813 
## ... Procrustes: rmse 0.02121749  max resid 0.08645838 
## Run 2 stress 0.1926701 
## Run 3 stress 0.1962628 
## Run 4 stress 0.1914205 
## ... Procrustes: rmse 0.009650509  max resid 0.06445552 
## Run 5 stress 0.1927342 
## Run 6 stress 0.1944217 
## Run 7 stress 0.2033203 
## Run 8 stress 0.2119161 
## Run 9 stress 0.1914252 
## ... Procrustes: rmse 0.009888332  max resid 0.06567271 
## Run 10 stress 0.1999456 
## Run 11 stress 0.1918427 
## Run 12 stress 0.2065167 
## Run 13 stress 0.1921984 
## Run 14 stress 0.1920911 
## Run 15 stress 0.1916466 
## ... Procrustes: rmse 0.01553547  max resid 0.06001273 
## Run 16 stress 0.2049019 
## Run 17 stress 0.1928554 
## Run 18 stress 0.1965603 
## Run 19 stress 0.1996184 
## Run 20 stress 0.2064503 
## *** No convergence -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
stressplot(community.jmds)

tabasco(community, use = community.jmds, labCol = habitatN$Habitat, col = brewer.pal(3, "Oranges"))

Betapart analysis before removing zerotons and singletons

communityBbetapart <- bind_cols(habitatN, communityB) 
communityBbetapart <- communityBbetapart %>% dplyr::select(-c(Habitat:sprichness))

JCNF <- communityBbetapart %>% dplyr::filter(habitatN$Habitat %in% c("JC", "NF")) %>% column_to_rownames(var="Site")
BBNF <- communityBbetapart %>% dplyr::filter(habitatN$Habitat %in% c("BB", "NF")) %>% column_to_rownames(var="Site")
CLNF <- communityBbetapart %>% dplyr::filter(habitatN$Habitat %in% c("CL", "NF")) %>% column_to_rownames(var="Site")
ECNF <- communityBbetapart %>% dplyr::filter(habitatN$Habitat %in% c("EC", "NF")) %>% column_to_rownames(var="Site")
MFNF <- communityBbetapart %>% dplyr::filter(habitatN$Habitat %in% c("MF", "NF")) %>% column_to_rownames(var="Site")

JCNF.multi.dist <- beta.multi(JCNF, index.family="jac")
BBNF.multi.dist <- beta.multi(BBNF, index.family="jac")
CLNF.multi.dist <- beta.multi(CLNF, index.family="jac")
ECNF.multi.dist <- beta.multi(ECNF, index.family="jac")
MFNF.multi.dist <- beta.multi(MFNF, index.family="jac")

multi.all <- list(JCNF = JCNF.multi.dist, BBNF = BBNF.multi.dist, CLNF = CLNF.multi.dist, ECNF =  ECNF.multi.dist, MFNF = MFNF.multi.dist)


ALL.dist <- communityBbetapart %>% column_to_rownames(var="Site") %>% beta.pair(index.family="jac")
ALL.dist.subset <- ALL.dist[["beta.jne"]]
ALL.dist.jne.jmds <- metaMDS(ALL.dist.subset)
## Run 0 stress 0.2878738 
## Run 1 stress 0.3102284 
## Run 2 stress 0.302938 
## Run 3 stress 0.2986731 
## Run 4 stress 0.2978615 
## Run 5 stress 0.3063189 
## Run 6 stress 0.3012631 
## Run 7 stress 0.2994175 
## Run 8 stress 0.2990311 
## Run 9 stress 0.3016207 
## Run 10 stress 0.301279 
## Run 11 stress 0.3037666 
## Run 12 stress 0.30188 
## Run 13 stress 0.3012636 
## Run 14 stress 0.298905 
## Run 15 stress 0.3047443 
## Run 16 stress 0.3023006 
## Run 17 stress 0.2977708 
## Run 18 stress 0.2984843 
## Run 19 stress 0.3031377 
## Run 20 stress 0.3051306 
## *** No convergence -- monoMDS stopping criteria:
##      1: no. of iterations >= maxit
##     19: stress ratio > sratmax
ALL.dist.jne.jmds <- metaMDS(ALL.dist.subset, previous.best = ALL.dist.jne.jmds)
## Starting from 2-dimensional configuration
## Run 0 stress 0.2878738 
## Run 1 stress 0.2998056 
## Run 2 stress 0.3002205 
## Run 3 stress 0.3079505 
## Run 4 stress 0.3024952 
## Run 5 stress 0.3072661 
## Run 6 stress 0.3016174 
## Run 7 stress 0.2989858 
## Run 8 stress 0.2990639 
## Run 9 stress 0.2981019 
## Run 10 stress 0.3020738 
## Run 11 stress 0.3063326 
## Run 12 stress 0.3064975 
## Run 13 stress 0.3044563 
## Run 14 stress 0.3052725 
## Run 15 stress 0.2992804 
## Run 16 stress 0.3039105 
## Run 17 stress 0.3057486 
## Run 18 stress 0.3074614 
## Run 19 stress 0.3015778 
## Run 20 stress 0.2995792 
## *** No convergence -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
stressplot(ALL.dist.jne.jmds)

ALL.dist.subset <- ALL.dist[["beta.jtu"]]
ALL.dist.jtu.jmds <- metaMDS(ALL.dist.subset)
## Run 0 stress 0.1964351 
## Run 1 stress 0.195639 
## ... New best solution
## ... Procrustes: rmse 0.04923861  max resid 0.1820483 
## Run 2 stress 0.2031976 
## Run 3 stress 0.1946288 
## ... New best solution
## ... Procrustes: rmse 0.0267172  max resid 0.1613364 
## Run 4 stress 0.2044537 
## Run 5 stress 0.2035156 
## Run 6 stress 0.1995786 
## Run 7 stress 0.2016852 
## Run 8 stress 0.2062343 
## Run 9 stress 0.2025288 
## Run 10 stress 0.1952773 
## Run 11 stress 0.1983759 
## Run 12 stress 0.198087 
## Run 13 stress 0.1979433 
## Run 14 stress 0.2045171 
## Run 15 stress 0.195977 
## Run 16 stress 0.2054783 
## Run 17 stress 0.1980396 
## Run 18 stress 0.2003249 
## Run 19 stress 0.2061029 
## Run 20 stress 0.1970336 
## *** No convergence -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
ALL.dist.jtu.jmds <- metaMDS(ALL.dist.subset, previous.best = ALL.dist.jne.jmds)
## Starting from 2-dimensional configuration
## Stress differs from 'previous.best': reset tries
## Run 0 stress 0.3873318 
## Run 1 stress 0.1943429 
## ... New best solution
## ... Procrustes: rmse 0.1279321  max resid 0.1960386 
## Run 2 stress 0.1945287 
## ... Procrustes: rmse 0.01745824  max resid 0.05851616 
## Run 3 stress 0.2149575 
## Run 4 stress 0.2076494 
## Run 5 stress 0.2010388 
## Run 6 stress 0.1965874 
## Run 7 stress 0.2082553 
## Run 8 stress 0.1943545 
## ... Procrustes: rmse 0.01666206  max resid 0.07883044 
## Run 9 stress 0.2026224 
## Run 10 stress 0.1947925 
## ... Procrustes: rmse 0.01806205  max resid 0.05953726 
## Run 11 stress 0.1990639 
## Run 12 stress 0.1951838 
## Run 13 stress 0.2033034 
## Run 14 stress 0.1974786 
## Run 15 stress 0.1951864 
## Run 16 stress 0.2038948 
## Run 17 stress 0.1959615 
## Run 18 stress 0.2126981 
## Run 19 stress 0.2073787 
## Run 20 stress 0.1944445 
## ... Procrustes: rmse 0.01415498  max resid 0.06369613 
## *** No convergence -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
stressplot(ALL.dist.jtu.jmds)

# , ylim = c(-0.5, 0.5), xlim = c(-0.4, 0.4)
par(mfrow=c(2,2))
colvec <- brewer.pal(5, "Set1")
with(habitatN, ordisurf(community.jmds, sprichness, main="All beta diversity", cex=0.5, col = "white"),  ylim = c(-0.5, 0.5))
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## y ~ s(x1, x2, k = 10, bs = "tp", fx = FALSE)
## 
## Estimated degrees of freedom:
## 5.17  total = 6.17 
## 
## REML score: 221.1028
# plot(community.jmds, main = "All beta diversity", ylim = c(-0.4, 0.4))
    with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("BB"))))
    with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[1]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("CL"))))
    with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("EC"))))
    with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("JC"))))
    with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("MF"))))
    with(habitatN, ordispider(community.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("NF"))))

plot(ALL.dist.jtu.jmds, main = "Turnover beta diversity only", ylim = c(-0.5, 0.5))
    with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("BB"))))
    with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[1]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("CL"))))
    with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("EC"))))
    with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("JC"))))
    with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("MF"))))
    with(habitatN, ordispider(ALL.dist.jtu.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("NF"))))
    
plot(ALL.dist.jne.jmds, main = "Nestedness beta diversity only", ylim = c(-0.5, 0.5))
    with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("BB"))))
    with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[1]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("CL"))))
    with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("EC"))))
    with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col=as.character(colvec[3]), alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("JC"))))
    with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("MF"))))
    with(habitatN, ordispider(ALL.dist.jne.jmds, Habitat, cex=.5, draw="polygon", col="darkblue", alpha=20, kind="se", conf=0.95, label=TRUE, show.groups=(c("NF"))))
    
par(mfrow=c(1,1))

Total beta diversity NMDS is correlated with turnover-only beta diversity

protest(community.jmds, ALL.dist.jtu.jmds)
## 
## Call:
## protest(X = community.jmds, Y = ALL.dist.jtu.jmds) 
## 
## Procrustes Sum of Squares (m12 squared):        0.07381 
## Correlation in a symmetric Procrustes rotation: 0.9624 
## Significance:  0.001 
## 
## Permutation: free
## Number of permutations: 999

Call: protest(X = community.jmds, Y = ALL.dist.jtu.jmds)

Procrustes Sum of Squares (m12 squared): 0.06903 Correlation in a symmetric Procrustes rotation: 0.9649 Significance: 0.001

Permutation: free Number of permutations: 999

Total beta diversity NMDS is not correlated with nestedness-only beta diversity

protest(community.jmds, ALL.dist.jne.jmds)
## 
## Call:
## protest(X = community.jmds, Y = ALL.dist.jne.jmds) 
## 
## Procrustes Sum of Squares (m12 squared):        0.9937 
## Correlation in a symmetric Procrustes rotation: 0.07915 
## Significance:  0.909 
## 
## Permutation: free
## Number of permutations: 999

Call: protest(X = community.jmds, Y = ALL.dist.jne.jmds)

Procrustes Sum of Squares (m12 squared): 0.9941 Correlation in a symmetric Procrustes rotation: 0.07677 Significance: 0.902

Permutation: free Number of permutations: 999

Beta diversity UNCONSTRAINED ordination

IMPORTANT

For NMDS, mvabund, and boral analyses, remove zerotons and singletons

communityB <- communityB[, which(specnumber(communityB, MARGIN=2) > 1)]
# 269 species remained

NMDS ordination

### do NMDS analysis to quickly see patterns ####
community.jmds <- metaMDS(communityB, distance = "jaccard", trymax = 40, binary=TRUE)
## Run 0 stress 0.1941036 
## Run 1 stress 0.1898452 
## ... New best solution
## ... Procrustes: rmse 0.05543086  max resid 0.2628178 
## Run 2 stress 0.2028075 
## Run 3 stress 0.1906199 
## Run 4 stress 0.1901219 
## ... Procrustes: rmse 0.03627274  max resid 0.1447797 
## Run 5 stress 0.1893944 
## ... New best solution
## ... Procrustes: rmse 0.02812124  max resid 0.09950703 
## Run 6 stress 0.2035494 
## Run 7 stress 0.1941584 
## Run 8 stress 0.1920113 
## Run 9 stress 0.1940623 
## Run 10 stress 0.1945931 
## Run 11 stress 0.1979348 
## Run 12 stress 0.2039438 
## Run 13 stress 0.1942227 
## Run 14 stress 0.1962865 
## Run 15 stress 0.2009856 
## Run 16 stress 0.1988058 
## Run 17 stress 0.2046388 
## Run 18 stress 0.1898469 
## ... Procrustes: rmse 0.04070436  max resid 0.1672788 
## Run 19 stress 0.2101562 
## Run 20 stress 0.2034526 
## Run 21 stress 0.1904331 
## Run 22 stress 0.1938473 
## Run 23 stress 0.1997294 
## Run 24 stress 0.1911799 
## Run 25 stress 0.1951652 
## Run 26 stress 0.2023139 
## Run 27 stress 0.1927948 
## Run 28 stress 0.1986546 
## Run 29 stress 0.1941592 
## Run 30 stress 0.1904473 
## Run 31 stress 0.1983602 
## Run 32 stress 0.1900627 
## Run 33 stress 0.1923577 
## Run 34 stress 0.2049473 
## Run 35 stress 0.1899515 
## Run 36 stress 0.1931936 
## Run 37 stress 0.2088232 
## Run 38 stress 0.2029848 
## Run 39 stress 0.2034659 
## Run 40 stress 0.2073307 
## *** No convergence -- monoMDS stopping criteria:
##     40: stress ratio > sratmax
community.jmds <- metaMDS(communityB, distance = "jaccard", binary = TRUE, previous.best = community.jmds)  # doesn't converge well, with final stress > 0.20
## Starting from 2-dimensional configuration
## Run 0 stress 0.1893944 
## Run 1 stress 0.1903028 
## Run 2 stress 0.1935929 
## Run 3 stress 0.1943752 
## Run 4 stress 0.1991359 
## Run 5 stress 0.1914793 
## Run 6 stress 0.1946816 
## Run 7 stress 0.2050226 
## Run 8 stress 0.194464 
## Run 9 stress 0.1899189 
## Run 10 stress 0.1894561 
## ... Procrustes: rmse 0.02454399  max resid 0.08716317 
## Run 11 stress 0.2007596 
## Run 12 stress 0.2015332 
## Run 13 stress 0.1934242 
## Run 14 stress 0.1931752 
## Run 15 stress 0.1913791 
## Run 16 stress 0.1925354 
## Run 17 stress 0.1947614 
## Run 18 stress 0.1905258 
## Run 19 stress 0.1906189 
## Run 20 stress 0.199274 
## *** No convergence -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
stressplot(community.jmds)

boral

Tested different error families (testing code is archived down below). Based on residuals, decided on family = “binomial,” which is good because the dataset is presence/absence. Using row.effect = “random” to get a composition-only analysis. Now rerun with many more iterations

This takes a long time to run, should run overnight

# set up MCMC parameters
# mcmc.control <- list(n.burnin = 300, n.iteration = 1000, n.thin = 30, seed = 123) # for debugging

mcmc.control4 <- list(n.burnin = 10000, n.iteration = 40000, n.thin = 30) 

# Set up priors
# Francis Hui suggests trying different priors to stabilise sampling for sparse matrices (such as we have).  This isn't as important as getting the mcmc.control parameters right
#set.prior <- list(type = c("normal","normal","normal","uniform"), hypparams = c(10, 10, 10, 30), ssvs.index = ssvsindex)  
# type = c("normal","normal","normal","uniform"), hypparams = c(10, 10, 10, 30) # boral default
# type = c("cauchy","cauchy","cauchy","uniform"), hypparams = c(2.5^2, 2.5^2, 2.5^2, 30) # Gelman proposed this
# type = c("normal","normal","normal","uniform"), hypparams = c(1, 1, 1, 30) ## People who developed the STAN package proposed this as one possibility. 


# set up model
commB.fit.b3.4.none <- boral(communityB, family = "binomial", num.lv = 2, row.eff = "random", mcmc.control = mcmc.control4, save.model = TRUE); beep(sound = 1)
## row.ids assumed to be a matrix with one column and elements 1,2,...nrow(y) i.e., a row-specific intercept
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2018e.1.0/
## zoneinfo/Asia/Shanghai'
## module glm loaded
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 16287
##    Unobserved stochastic nodes: 17271
##    Total graph size: 133646
## 
## Initializing model
# save output in case i want to do other analyses on it later
saveRDS(commB.fit.b3.4.none, "boral_commB.fit.b3.4.RDS")
# commB.fit.b3.4.none <- readRDS("boral_commB.fit.b3.4.RDS") # to restore

summary(commB.fit.b3.4.none)
## $call
## boral.default(y = communityB, family = "binomial", num.lv = 2, 
##     row.eff = "random", save.model = TRUE, mcmc.control = mcmc.control4)
## 
## $coefficients
##                 coefficients
## cols              beta0 theta1 theta2
##   Cluster1003060 -0.061  1.885  0.000
##   Cluster1003587  0.735  1.522  1.423
##   Cluster1008496 -1.900 -5.033 -2.042
##   Cluster1036508 -0.047  0.165 -0.940
##   Cluster1036793  0.374  0.371 -0.960
##   Cluster1043256  0.187  1.502  1.184
##   Cluster104659  -1.599 -1.381  4.035
##   Cluster1048454 -0.989  0.659  3.049
##   Cluster105406   0.064 -0.649 -1.438
##   Cluster1058962 -1.015  0.029 -4.471
##   Cluster1082093  0.431  1.069  1.483
##   Cluster1094626 -1.380 -2.515  2.730
##   Cluster1111802 -1.071 -1.323  2.546
##   Cluster1111808 -2.013 -1.072 -4.954
##   Cluster1112567 -1.428  0.603 -3.935
##   Cluster1115775  0.166  1.279  1.260
##   Cluster1117056  0.205 -0.150  0.791
##   Cluster1118816 -0.362 -1.143  1.256
##   Cluster1120269 -0.152  1.950  1.203
##   Cluster1123272 -0.362  4.552  0.066
##   Cluster1143779 -0.081 -2.963 -1.791
##   Cluster114519   0.045  0.133 -1.452
##   Cluster1146018 -1.356 -2.201 -3.895
##   Cluster1146858 -0.165  3.069  0.259
##   Cluster1147021 -0.297 -2.032 -1.246
##   Cluster1147433  0.000  2.918  0.926
##   Cluster1151646 -0.431 -3.275 -1.399
##   Cluster1154095  0.323  1.376 -0.713
##   Cluster1160549  0.165 -2.361  0.024
##   Cluster116476  -2.734 -2.942  4.000
##   Cluster1183739 -0.130 -3.801  0.085
##   Cluster1183836  0.422  0.998 -1.893
##   Cluster1185877 -0.997  1.292 -5.130
##   Cluster1189880 -1.433  0.366 -3.602
##   Cluster1190248 -0.724 -4.689  0.809
##   Cluster119182  -0.698  1.412  3.439
##   Cluster1205684  0.359 -0.379 -1.213
##   Cluster1206358 -2.208  1.815 -4.829
##   Cluster1212246 -0.646  1.658 -2.522
##   Cluster1218901 -1.008 -1.093  2.694
##   Cluster1219908 -0.609 -0.264  2.778
##   Cluster1228370 -1.383 -4.248 -1.807
##   Cluster1230530 -1.688  4.845 -0.284
##   Cluster1236621 -1.212  4.119  1.003
##   Cluster1244964 -0.670 -2.060 -1.375
##   Cluster1248054  0.252 -0.057  0.503
##   Cluster1251081 -1.095 -1.363  2.553
##   Cluster1252116 -0.338 -0.418  2.378
##   Cluster1262671  0.910  0.451 -1.257
##   Cluster1263180 -1.167  4.409 -2.821
##   Cluster1264742  0.863 -0.891  1.839
##   Cluster1270869  0.374  1.666 -1.118
##   Cluster1271357 -1.459  2.180 -4.986
##   Cluster1322    -0.714 -6.581  0.041
##   Cluster141583  -0.002 -0.060 -1.233
##   Cluster144694  -0.334  0.958 -1.025
##   Cluster148932  -0.852  2.053 -3.167
##   Cluster159898  -0.318 -1.530  0.793
##   Cluster161999  -2.575 -0.003  4.710
##   Cluster162246  -0.701  2.799 -1.427
##   Cluster162949  -2.692 -2.387  4.103
##   Cluster168999  -0.665 -1.197 -1.917
##   Cluster169255  -0.979 -3.805 -2.020
##   Cluster170040  -0.460  0.673 -2.174
##   Cluster190091   0.213 -1.255  2.371
##   Cluster193554  -1.956 -0.386  4.298
##   Cluster193702  -0.282  3.060  1.263
##   Cluster199147  -1.620 -5.293 -4.644
##   Cluster207055  -0.301 -2.077 -1.182
##   Cluster209030  -0.880  4.158  0.961
##   Cluster21179   -0.053 -0.853 -1.193
##   Cluster214234  -0.001  1.607 -1.252
##   Cluster216019  -0.376  2.486  0.191
##   Cluster22396   -0.791  2.282 -2.491
##   Cluster230928  -0.114 -2.971  5.967
##   Cluster236674  -0.158 -0.551  0.191
##   Cluster238319  -0.244 -2.501  0.758
##   Cluster238661  -1.238 -4.727 -0.056
##   Cluster248444  -0.374 -1.145  1.340
##   Cluster248703  -0.194 -0.131 -0.478
##   Cluster248717   0.077 -1.495 -1.264
##   Cluster251185  -0.249 -1.294  1.002
##   Cluster255441   0.130 -0.506 -0.154
##   Cluster256405   1.421  2.134  0.046
##   Cluster258891  -1.975  2.307 -3.986
##   Cluster260416  -0.690 -1.566  2.553
##   Cluster264960  -1.335 -0.835  3.119
##   Cluster268911  -0.471  1.784  3.262
##   Cluster27114   -1.055  3.794  1.935
##   Cluster278879  -0.787  3.347 -0.550
##   Cluster279779   0.110 -0.103 -1.248
##   Cluster283921   1.513 -1.343 -2.090
##   Cluster292040  -0.295 -1.627 -0.437
##   Cluster293462  -0.343 -1.840  0.707
##   Cluster297322  -0.568 -2.599  0.513
##   Cluster297807  -0.643 -0.371  3.189
##   Cluster300625  -0.397  3.227 -0.514
##   Cluster304037  -0.984  4.695 -1.301
##   Cluster307655  -0.257  0.628 -0.884
##   Cluster311923  -0.003 -1.393  1.745
##   Cluster313551  -0.344  0.497 -2.877
##   Cluster317158  -2.026  2.193 -5.545
##   Cluster317705  -0.253 -1.266 -0.918
##   Cluster3190    -1.222  3.930  2.096
##   Cluster319598  -2.019 -4.231 -4.095
##   Cluster323177  -0.417  0.262 -2.155
##   Cluster328858   0.840  3.326  0.622
##   Cluster331968   0.567  0.068  0.059
##   Cluster337634  -1.685  4.739 -0.326
##   Cluster346275  -0.207  0.440 -0.942
##   Cluster346941   0.709 -1.667  2.071
##   Cluster34774   -0.395  0.551 -2.129
##   Cluster349803  -0.173 -1.059  0.466
##   Cluster356106   0.336  1.189  1.276
##   Cluster356886  -0.530  0.801 -1.640
##   Cluster363044  -2.313  5.354  1.586
##   Cluster366435  -0.503  2.985  1.262
##   Cluster374617  -0.200  0.557 -1.657
##   Cluster375655  -0.495 -1.689 -1.129
##   Cluster382546  -0.455 -1.515 -0.839
##   Cluster392350  -1.386  4.504  0.822
##   Cluster405715  -0.385 -1.332 -0.808
##   Cluster405735  -0.146 -1.639 -1.036
##   Cluster409890   0.010  1.246  3.409
##   Cluster411007  -0.084  1.629  1.255
##   Cluster413866  -0.530 -1.456  1.682
##   Cluster418132  -0.821 -4.570 -0.215
##   Cluster418592  -2.445 -1.359  3.963
##   Cluster421090  -0.800 -0.746 -3.071
##   Cluster422387  -1.165 -4.375 -0.259
##   Cluster425852  -0.730 -0.519 -2.370
##   Cluster426879  -0.381  0.675 -2.579
##   Cluster434789  -0.069  1.736  0.389
##   Cluster436377  -0.090  1.176  1.571
##   Cluster442017   0.259 -1.052  1.100
##   Cluster442199  -2.687 -2.868  4.011
##   Cluster446078  -0.374 -1.906  2.171
##   Cluster449561   0.862 -0.104  0.793
##   Cluster451245   0.647 -2.540  2.056
##   Cluster45771   -2.219  1.142 -6.062
##   Cluster458565  -0.472 -1.655  1.344
##   Cluster465625  -2.328  0.566  4.246
##   Cluster466805  -2.826 -0.227  6.347
##   Cluster472574   0.443 -3.139  0.310
##   Cluster474866  -0.480 -2.208  1.713
##   Cluster477205  -0.877  1.412 -2.266
##   Cluster47848   -0.211 -0.710 -0.885
##   Cluster487185   0.322 -0.755  0.861
##   Cluster4912    -0.680  2.889 -1.189
##   Cluster506593  -1.979  6.289 -0.208
##   Cluster507669  -2.474  1.809 -4.792
##   Cluster510789  -0.214 -2.075  1.869
##   Cluster511143  -1.172 -0.222 -4.791
##   Cluster511497  -1.199  3.925  2.175
##   Cluster512825  -0.475  0.571 -1.865
##   Cluster515093   0.162 -3.041 -2.761
##   Cluster515604   0.363 -0.305 -1.363
##   Cluster517616  -0.146 -2.120 -1.157
##   Cluster528954  -0.663 -2.779 -0.904
##   Cluster53598   -0.375  2.502  1.303
##   Cluster538054   0.136  0.408  1.460
##   Cluster549169  -0.346  1.971  2.598
##   Cluster549198  -2.544  0.809  5.321
##   Cluster549885  -3.064 -0.265  6.716
##   Cluster549932  -2.033  2.360 -4.071
##   Cluster550517  -0.258  2.339  0.715
##   Cluster551260  -0.282  2.365  0.760
##   Cluster554866   0.308 -3.394  0.334
##   Cluster560910   0.012  0.479 -1.207
##   Cluster562057  -0.594 -2.004  1.591
##   Cluster566878   0.668  0.115  1.805
##   Cluster56812   -1.973  6.254 -0.260
##   Cluster568976  -1.221 -0.086  4.117
##   Cluster571733  -0.034 -1.522  0.723
##   Cluster571940  -0.710 -1.420  2.834
##   Cluster582971  -3.053  0.047  6.537
##   Cluster588978  -1.992  2.225 -4.041
##   Cluster599965   0.100 -0.185 -0.832
##   Cluster604765   0.142  1.170 -1.519
##   Cluster605186  -1.907 -2.283 -4.359
##   Cluster608280   0.572  0.802  0.262
##   Cluster624167  -0.217 -0.393 -1.034
##   Cluster62672   -0.081 -0.094  0.248
##   Cluster628263  -0.060  1.716  0.530
##   Cluster629282  -0.002  0.367  0.006
##   Cluster633345  -2.888 -0.061  6.366
##   Cluster635168  -0.387  1.766 -0.797
##   Cluster636211  -0.024  0.326 -1.598
##   Cluster636975   2.153  0.868  0.590
##   Cluster637214  -2.705  0.470 -6.147
##   Cluster638434  -0.241  0.745 -2.146
##   Cluster64231   -0.033 -0.156 -1.209
##   Cluster648953  -0.860 -2.241 -2.959
##   Cluster657934  -0.130  2.084  0.301
##   Cluster662085  -2.304  5.506 -0.014
##   Cluster667421  -0.391 -0.667 -1.365
##   Cluster675458  -0.163 -0.066  0.472
##   Cluster677759  -0.360 -0.015 -2.707
##   Cluster680855  -0.712 -2.445  2.285
##   Cluster681238  -2.005  0.228  3.858
##   Cluster685794   0.604  0.272 -1.256
##   Cluster689312  -0.460 -1.613 -0.984
##   Cluster691418  -0.470 -1.734 -0.868
##   Cluster691869  -0.239  0.201 -0.713
##   Cluster695513  -0.441  1.702 -0.987
##   Cluster697682  -0.618  1.685 -1.857
##   Cluster714500  -0.318  1.451 -0.841
##   Cluster7237    -0.164 -1.982  1.770
##   Cluster731000  -2.665 -2.554  3.986
##   Cluster738928  -0.417  1.859 -0.846
##   Cluster742248  -1.079 -2.096  5.959
##   Cluster746906  -1.546  1.123 -3.643
##   Cluster750082   0.108 -0.109 -2.120
##   Cluster752397   0.115 -2.332  0.684
##   Cluster759071  -3.003  0.076  6.550
##   Cluster761967  -2.085  2.527 -4.089
##   Cluster762239  -1.762 -2.962 -4.301
##   Cluster774173  -0.222 -1.775 -1.650
##   Cluster779302  -3.154  0.096 -6.742
##   Cluster794054  -2.978 -4.022 -4.930
##   Cluster802315   0.429 -0.096 -0.117
##   Cluster815919  -1.672  6.319  0.634
##   Cluster816928  -0.237  1.052 -1.109
##   Cluster827049  -1.877  6.119 -0.227
##   Cluster827888  -3.094 -0.181  6.534
##   Cluster831241  -0.510 -1.642 -1.110
##   Cluster852624  -0.474  0.859 -1.410
##   Cluster852926  -1.765 -5.920 -1.762
##   Cluster866046  -0.179 -1.297 -1.998
##   Cluster866530   1.663 -0.316  1.043
##   Cluster868323  -0.740 -1.858  1.991
##   Cluster870147  -1.725  3.549  3.644
##   Cluster871445  -1.210 -1.118  2.941
##   Cluster872255  -1.135 -1.182  2.755
##   Cluster8795    -0.625 -2.822  0.608
##   Cluster881515  -0.467 -0.681 -1.689
##   Cluster885812  -0.682 -0.959  2.782
##   Cluster892866  -2.665 -1.269  4.167
##   Cluster893883  -0.437  0.503 -2.791
##   Cluster896261  -0.686  1.950  2.818
##   Cluster89960   -0.934  0.550 -2.760
##   Cluster899998  -1.201  0.645 -3.251
##   Cluster91278   -0.362 -3.417 -0.664
##   Cluster91284   -0.064 -0.437 -1.717
##   Cluster914193  -0.155  1.605 -0.907
##   Cluster932946  -0.405  0.685 -1.328
##   Cluster933654  -0.022  0.021  0.546
##   Cluster933936  -0.514 -0.771 -1.751
##   Cluster946116  -1.784 -5.195 -1.346
##   Cluster95070   -2.444  0.740  4.419
##   Cluster953387  -0.462 -1.346  3.449
##   Cluster955298  -1.242  2.989  3.287
##   Cluster956580  -0.530  1.306 -1.300
##   Cluster956910  -0.204 -1.294  0.616
##   Cluster957273  -2.138  5.399 -0.254
##   Cluster957758   0.332 -1.692 -1.553
##   Cluster96058   -0.712  3.482  1.358
##   Cluster964340  -0.458  1.689 -1.071
##   Cluster970338  -0.447 -1.801 -1.176
##   Cluster970757  -0.080 -1.838  0.025
##   Cluster975075  -0.038 -1.631 -1.435
##   Cluster975205  -0.331  0.262 -1.040
##   Cluster982015  -1.372 -3.560 -4.061
##   Cluster987637  -1.604 -2.367  3.056
##   Cluster991103  -0.037 -0.804 -1.295
##   Cluster995806  -0.722 -2.254  1.626
##   Cluster99619   -0.479 -1.620  2.062
## 
## $lvs
##     lv
## rows    lv1    lv2
##   1   0.039 -0.853
##   2   0.078 -0.940
##   3   0.210 -0.675
##   4  -0.293 -1.126
##   5   0.245 -0.806
##   6   0.529 -0.884
##   7   0.183 -0.680
##   8   0.367  0.413
##   9  -0.238  0.346
##   10 -0.199  0.259
##   11 -0.282  0.353
##   12 -0.207  0.306
##   13 -0.324  0.454
##   14 -0.148  0.596
##   15 -0.262  0.509
##   16 -0.225  0.615
##   17  0.019  1.181
##   18 -0.064  1.147
##   19 -0.308  1.257
##   20 -0.253  1.425
##   21 -0.565  1.272
##   22  0.050  1.305
##   23  0.469  0.177
##   24  0.215  0.089
##   25  0.311  0.253
##   26  1.205  0.051
##   27  0.668  0.085
##   28  0.812 -0.090
##   29  0.896 -0.066
##   30 -0.443 -0.345
##   31 -0.258 -0.345
##   32 -0.147 -0.212
##   33 -0.257 -0.120
##   34 -0.083 -0.330
##   35 -0.040 -0.226
##   36 -1.137 -0.698
##   37 -0.627 -0.462
##   38 -0.452 -0.103
##   39 -0.301 -0.330
##   40  0.112 -0.241
##   41  0.121 -0.301
##   42  0.070 -0.435
##   43  0.095 -0.440
##   44  0.167 -0.648
##   45 -0.018 -0.470
##   46  0.078 -0.169
##   47  0.001 -0.418
##   48 -0.119 -0.172
##   49  0.130 -0.346
##   50  0.146 -0.213
##   51 -0.166 -0.208
##   52  0.302 -0.205
##   53  0.220 -0.290
##   54  0.170 -0.399
##   55  0.071 -0.312
##   56 -0.287 -0.262
##   57 -0.204 -0.180
##   58 -0.177 -0.221
##   59 -0.274 -0.331
##   60  0.619  0.315
##   61  0.765  0.404
## 
## $row.coefficients
## $row.coefficients[[1]]
##      1      2      3      4      5      6      7      8      9     10 
## -3.002 -3.852 -2.339 -4.652 -3.675 -3.294 -2.448 -2.950 -1.478 -1.186 
##     11     12     13     14     15     16     17     18     19     20 
## -1.559 -1.487 -1.905 -2.602 -1.937 -2.177 -3.750 -3.959 -3.839 -4.352 
##     21     22     23     24     25     26     27     28     29     30 
## -4.520 -4.166 -2.885 -2.272 -2.172 -5.011 -2.516 -2.920 -3.676 -2.807 
##     31     32     33     34     35     36     37     38     39     40 
## -2.032 -2.452 -2.474 -2.509 -1.637 -4.669 -3.484 -2.010 -2.333 -1.881 
##     41     42     43     44     45     46     47     48     49     50 
## -1.934 -2.014 -1.946 -2.649 -1.863 -1.250 -2.146 -2.255 -2.471 -2.357 
##     51     52     53     54     55     56     57     58     59     60 
## -2.299 -1.582 -1.336 -1.327 -1.690 -1.436 -1.275 -1.383 -1.856 -2.771 
##     61 
## -3.637 
## 
## 
## $est
## [1] "median"
## 
## $calc.ics
## [1] FALSE
## 
## $trial.size
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [71] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [106] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [141] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [176] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [211] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [246] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 
## $num.ord.levels
## [1] 0
## 
## $prior.control
## $prior.control$ssvs.index
## [1] -1
## 
## 
## attr(,"class")
## [1] "summary.boral"
par(mfrow = c(2,2))
plot(commB.fit.b3.4.none) ## Plots used in residual analysis, 
## NULL
## Warning in qnorm(get_mus[, j] + 1e-05): NaNs produced

par(mfrow = c(1,1))

commB.fit.b3.4.none.ord <- lvsplot(commB.fit.b3.4.none, biplot=FALSE, col = as.numeric(habitatN$Habitat), return.vals = TRUE)

res.cors <- get.residual.cor(commB.fit.b3.4.none); beep(1) # residual deviation
res.cors$trace
## [1] 4419.197
# 4419.197
# habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,1]

cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,1], method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 1]
## t = -3.9509, df = 59, p-value = 0.0002106
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6359671 -0.2323392
## sample estimates:
##        cor 
## -0.4573986
# t = -3.9509, df = 59, p-value = 0.0002106
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval: -0.6359671 -0.2323392
# sample estimates: cor -0.4573986
cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,1], method = "kendall")
## 
##  Kendall's rank correlation tau
## 
## data:  habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 1]
## z = -4.0139, p-value = 5.973e-05
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##        tau 
## -0.3525554
cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,1], method = "spearman")
## Warning in cor.test.default(habitatN$Elevation_m, y =
## commB.fit.b3.4.none$lv.iqr[, : Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 1]
## S = 56743, p-value = 4.022e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.5003503
# S = 56743, p-value = 4.022e-05
# alternative hypothesis: true rho is not equal to 0
# sample estimates: rho -0.5003503 

cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,2], method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 2]
## t = -5.0463, df = 59, p-value = 4.603e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7036107 -0.3449532
## sample estimates:
##        cor 
## -0.5490776
# t = -5.0463, df = 59, p-value = 4.603e-06
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval: -0.7036107 -0.3449532
#sample estimates: cor -0.5490776 

cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,2], method = "kendall")
## 
##  Kendall's rank correlation tau
## 
## data:  habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 2]
## z = -4.3997, p-value = 1.084e-05
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##        tau 
## -0.3864444
cor.test(habitatN$Elevation_m, y = commB.fit.b3.4.none$lv.iqr[,2], method = "spearman")
## Warning in cor.test.default(habitatN$Elevation_m, y =
## commB.fit.b3.4.none$lv.iqr[, : Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  habitatN$Elevation_m and commB.fit.b3.4.none$lv.iqr[, 2]
## S = 59369, p-value = 1.645e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.5697854
# S = 59369, p-value = 1.645e-06
# alternative hypothesis: true rho is not equal to 0
#sample estimates: rho -0.5697854 

Try to interpret the latent variables. Latent variables are clearly correlated with the forest types (habitatN$Habitat) but weakly at best correlated with Simpson_evenness_index, Elevation_m, and weather_value. There does seem to be a quadratic relationship between lvs2 and Elevation_m. The correlation plot suggests that there are some informative Landsat channels.

cbind(commB.fit.b3.4.none.ord$scaled.lvs,habitatN$Habitat)
##              [,1]        [,2] [,3]
##  [1,] -2.44609743  0.14120912    1
##  [2,] -2.71739500  0.02594075    1
##  [3,] -1.95262004 -0.50982462    1
##  [4,] -3.18536197  1.38226122    1
##  [5,] -2.35547004 -0.59986978    1
##  [6,] -2.66528779 -1.57655378    1
##  [7,] -1.96043686 -0.41521070    1
##  [8,]  1.29233240 -1.35877897    2
##  [9,]  1.24502413  0.79077191    2
## [10,]  0.97233331  0.67588529    2
## [11,]  1.27739755  0.94241330    2
## [12,]  1.11631926  0.69251491    2
## [13,]  1.59319358  1.06383239    2
## [14,]  1.97665578  0.40647208    2
## [15,]  1.74210233  0.83076662    2
## [16,]  2.05490833  0.66967150    2
## [17,]  3.70022098 -0.34327830    2
## [18,]  3.62047349 -0.03867699    2
## [19,]  4.01380983  0.79084955    2
## [20,]  4.50639852  0.54992883    2
## [21,]  4.12465736  1.68866025    2
## [22,]  4.06900242 -0.48593482    2
## [23,]  0.55520084 -1.65516835    3
## [24,]  0.35413600 -0.73561800    3
## [25,]  0.82499633 -1.12001204    3
## [26,] -0.01387935 -4.21208784    3
## [27,]  0.22575056 -2.32990136    3
## [28,] -0.33700670 -2.78820551    3
## [29,] -0.28875985 -3.09199085    3
## [30,] -0.78772274  1.70052382    4
## [31,] -0.83774256  1.04730697    4
## [32,] -0.46196299  0.62200003    4
## [33,] -0.15737333  0.98320665    4
## [34,] -0.83530198  0.42632406    4
## [35,] -0.53292512  0.24800321    4
## [36,] -1.67801691  4.24030441    4
## [37,] -1.09485419  2.38076134    4
## [38,] -0.05688640  1.66663045    4
## [39,] -0.78108896  1.19630589    4
## [40,] -0.61668994 -0.28198669    5
## [41,] -0.79990450 -0.29730090    5
## [42,] -1.19224050 -0.08209765    5
## [43,] -1.21216052 -0.16997671    5
## [44,] -1.85802182 -0.36629931    5
## [45,] -1.27467902  0.23566789    5
## [46,] -0.38940061 -0.18206862    5
## [47,] -1.12414642  0.15539083    5
## [48,] -0.35005590  0.51321837    5
## [49,] -0.93712748 -0.31671391    6
## [50,] -0.53978252 -0.41119495    6
## [51,] -0.44502825  0.68739927    6
## [52,] -0.55507552 -0.96147362    6
## [53,] -0.79114685 -0.65087306    6
## [54,] -1.10919122 -0.44640189    6
## [55,] -0.81955229 -0.11903023    6
## [56,] -0.57857530  1.12806395    6
## [57,] -0.35134575  0.81193754    6
## [58,] -0.48287375  0.72932460    6
## [59,] -0.78927873  1.09929112    6
## [60,]  0.93308104 -2.21828393    6
## [61,]  1.16447301 -2.75802475    6
par(mfrow = c(1,2))
plot(commB.fit.b3.4.none.ord$scaled.lvs[,1]~habitatN$Habitat)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,2]~habitatN$Habitat)

plot(commB.fit.b3.4.none.ord$scaled.lvs[,1]~habitatN$Simpson_evenness_index)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,2]~habitatN$Simpson_evenness_index)

plot(commB.fit.b3.4.none.ord$scaled.lvs[,1]~habitatN$Elevation_m)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,2]~habitatN$Elevation_m)

plot(commB.fit.b3.4.none.ord$scaled.lvs[,1]~habitatN$weather_value)
plot(commB.fit.b3.4.none.ord$scaled.lvs[,2]~habitatN$weather_value)

# correlation matrix of 2 latent variables (1, 2) with all the Landsat channels. Look only at the two left columns
lvsmatrix <- habitatN %>% dplyr::select(starts_with("x"))
lvsmatrix <- cbind(commB.fit.b3.4.none.ord$scaled.lvs, lvsmatrix)
lvsmatrix_cor <- cor(lvsmatrix)
par(mfrow = c(1,1))
corrplot(lvsmatrix_cor, type = "lower", tl.pos = "l", tl.cex = 0.5, sig.level = 0.05, insig = "blank")

mvabund

Testing whether i can combine levels within habitatN$habitat. I use mvabund to test whether MF and NF are significantly different from each other and from the other habitats: BB, EC, JC, and CL. I do this by creating two models, one with all 6 levels and one with NF|MF combined with one of the other two habitats. I run mvabund on both models and use anova to compare the two models. If significantly different, then the two habitat types are significantly different. Finally, i conservatively adjust the p-values for all 15 possible pairwise comparisons (6 * 5)/2.

anova.manyglm options test = “score” # anova.manyglm help file says that test=“wald” is poor for binomial data under some conditions. “score” is the better alternative. cor.type = “shrink” # estimates correlations between species, but in an efficient way, which is necessary for our kind of dataset, where there are many more species than there are samples

# communityB <- communityB[ , 1:150]  # comment away if i want to use the whole dataset.  this is to produce a partial dataset for debugging
communityB.mvb <- mvabund(communityB) 

#nboot <- 499 # set to 20 for debugging (~25 mins for nboot = 499)
nboot <- 999 # set to 999 for publication


# base model with no levels combined
mod_BBCLECJCMFNF.mvb <- manyglm(communityB.mvb ~ Habitat, data = habitatN, family = binomial("cloglog"))
plot(mod_BBCLECJCMFNF.mvb)

anova(mod_BBCLECJCMFNF.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot)  # p = 0.001, df = 5, score = 534.8, nBoot = 999, 20 mins.  The Habitat predictor has a sig effect
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.01 minutes...
##  Resampling run 100 finished. Time elapsed: 1.46 minutes...
##  Resampling run 200 finished. Time elapsed: 2.87 minutes...
##  Resampling run 300 finished. Time elapsed: 4.30 minutes...
##  Resampling run 400 finished. Time elapsed: 5.72 minutes...
##  Resampling run 500 finished. Time elapsed: 7.14 minutes...
##  Resampling run 600 finished. Time elapsed: 8.59 minutes...
##  Resampling run 700 finished. Time elapsed: 10.01 minutes...
##  Resampling run 800 finished. Time elapsed: 11.44 minutes...
##  Resampling run 900 finished. Time elapsed: 12.86 minutes...
## Time elapsed: 0 hr 14 min 15 sec
## Analysis of Variance Table
## 
## Model: manyglm(formula = communityB.mvb ~ Habitat, family = binomial("cloglog"), 
## Model:     data = habitatN)
## 
## Multivariate test:
##             Res.Df Df.diff score Pr(>score)    
## (Intercept)     60                             
## Habitat         55       5 538.5      0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming correlated response via ridge regularization 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine NF and MF
habitatN$HabitatMFNF <- fct_collapse(habitatN$Habitat, MFNF = c("MF", "NF"))
fct_count(habitatN$HabitatMFNF)
## # A tibble: 5 x 2
##   f         n
##   <fct> <int>
## 1 BB        7
## 2 CL       15
## 3 EC        7
## 4 JC       10
## 5 MFNF     22
mod_CLBBECJC_MFNF.mvb <- manyglm(communityB.mvb ~ HabitatMFNF, data = habitatN, family = binomial("cloglog"))
plot(mod_CLBBECJC_MFNF.mvb)

anova(mod_BBCLECJCMFNF.mvb, mod_CLBBECJC_MFNF.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot) # p = 0.001, df = 1, score= 55.42, nBoot = 999.  MF and NF are sig diff
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.01 minutes...
##  Resampling run 100 finished. Time elapsed: 1.51 minutes...
##  Resampling run 200 finished. Time elapsed: 3.02 minutes...
##  Resampling run 300 finished. Time elapsed: 4.52 minutes...
##  Resampling run 400 finished. Time elapsed: 6.06 minutes...
##  Resampling run 500 finished. Time elapsed: 7.53 minutes...
##  Resampling run 600 finished. Time elapsed: 8.99 minutes...
##  Resampling run 700 finished. Time elapsed: 10.51 minutes...
##  Resampling run 800 finished. Time elapsed: 12.08 minutes...
##  Resampling run 900 finished. Time elapsed: 13.62 minutes...
## Time elapsed: 0 hr 15 min 8 sec
## Analysis of Variance Table
## 
## mod_CLBBECJC_MFNF.mvb: communityB.mvb ~ HabitatMFNF
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
## 
## Multivariate test:
##                       Res.Df Df.diff score Pr(>score)   
## mod_CLBBECJC_MFNF.mvb     56                            
## mod_BBCLECJCMFNF.mvb      55       1 57.34      0.009 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming correlated response via ridge regularization 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine NF and BB
habitatN$HabitatNFBB <- fct_collapse(habitatN$Habitat, NFBB = c("NF", "BB"))
fct_count(habitatN$HabitatNFBB)
## # A tibble: 5 x 2
##   f         n
##   <fct> <int>
## 1 NFBB     20
## 2 CL       15
## 3 EC        7
## 4 JC       10
## 5 MF        9
mod_CLECJCMF_NFBB.mvb <- manyglm(communityB.mvb ~ HabitatNFBB, data = habitatN, family = binomial("cloglog"))
plot(mod_CLECJCMF_NFBB.mvb)

anova(mod_BBCLECJCMFNF.mvb, mod_CLECJCMF_NFBB.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot)  # p = 0.001, df = 1, score= 54.88, nBoot = 999, 20 mins.  BB and NF are sig diff
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.02 minutes...
##  Resampling run 100 finished. Time elapsed: 1.55 minutes...
##  Resampling run 200 finished. Time elapsed: 3.11 minutes...
##  Resampling run 300 finished. Time elapsed: 4.65 minutes...
##  Resampling run 400 finished. Time elapsed: 6.18 minutes...
##  Resampling run 500 finished. Time elapsed: 7.71 minutes...
##  Resampling run 600 finished. Time elapsed: 9.24 minutes...
##  Resampling run 700 finished. Time elapsed: 10.76 minutes...
##  Resampling run 800 finished. Time elapsed: 12.30 minutes...
##  Resampling run 900 finished. Time elapsed: 13.82 minutes...
## Time elapsed: 0 hr 15 min 20 sec
## Analysis of Variance Table
## 
## mod_CLECJCMF_NFBB.mvb: communityB.mvb ~ HabitatNFBB
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
## 
## Multivariate test:
##                       Res.Df Df.diff score Pr(>score)    
## mod_CLECJCMF_NFBB.mvb     56                             
## mod_BBCLECJCMFNF.mvb      55       1 54.88      0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming correlated response via ridge regularization 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine NF and JC
habitatN$HabitatNFJC <- fct_collapse(habitatN$Habitat, NFJC = c("NF", "JC"))
fct_count(habitatN$HabitatNFJC)
## # A tibble: 5 x 2
##   f         n
##   <fct> <int>
## 1 BB        7
## 2 CL       15
## 3 EC        7
## 4 NFJC     23
## 5 MF        9
mod_CLECBBMF_NFJC.mvb <- manyglm(communityB.mvb ~ HabitatNFJC, data = habitatN, family = binomial("cloglog"))
plot(mod_CLECBBMF_NFJC.mvb)

anova(mod_BBCLECJCMFNF.mvb, mod_CLECBBMF_NFJC.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot)  # p = 0.001, df = 1, score= 58.09, nBoot = 999, 20 mins.  JC and NF are sig diff
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.02 minutes...
##  Resampling run 100 finished. Time elapsed: 1.55 minutes...
##  Resampling run 200 finished. Time elapsed: 3.07 minutes...
##  Resampling run 300 finished. Time elapsed: 4.60 minutes...
##  Resampling run 400 finished. Time elapsed: 6.14 minutes...
##  Resampling run 500 finished. Time elapsed: 7.66 minutes...
##  Resampling run 600 finished. Time elapsed: 9.19 minutes...
##  Resampling run 700 finished. Time elapsed: 10.71 minutes...
##  Resampling run 800 finished. Time elapsed: 12.24 minutes...
##  Resampling run 900 finished. Time elapsed: 13.77 minutes...
## Time elapsed: 0 hr 15 min 17 sec
## Analysis of Variance Table
## 
## mod_CLECBBMF_NFJC.mvb: communityB.mvb ~ HabitatNFJC
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
## 
## Multivariate test:
##                       Res.Df Df.diff score Pr(>score)    
## mod_CLECBBMF_NFJC.mvb     56                             
## mod_BBCLECJCMFNF.mvb      55       1 58.09      0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming correlated response via ridge regularization 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine NF and EC
habitatN$HabitatNFEC <- fct_collapse(habitatN$Habitat, NFEC = c("NF", "EC"))
fct_count(habitatN$HabitatNFEC)
## # A tibble: 5 x 2
##   f         n
##   <fct> <int>
## 1 BB        7
## 2 CL       15
## 3 NFEC     20
## 4 JC       10
## 5 MF        9
mod_CLBBJCMF_NFEC.mvb <- manyglm(communityB.mvb ~ HabitatNFEC, data = habitatN, family = binomial("cloglog"))
plot(mod_CLBBJCMF_NFEC.mvb)

anova(mod_BBCLECJCMFNF.mvb, mod_CLBBJCMF_NFEC.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot)  # p = 0.003, df = 1, score= 54.41, nBoot = 999, 20 mins.  EC and NF are sig diff
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.02 minutes...
##  Resampling run 100 finished. Time elapsed: 1.54 minutes...
##  Resampling run 200 finished. Time elapsed: 3.07 minutes...
##  Resampling run 300 finished. Time elapsed: 4.59 minutes...
##  Resampling run 400 finished. Time elapsed: 6.11 minutes...
##  Resampling run 500 finished. Time elapsed: 7.64 minutes...
##  Resampling run 600 finished. Time elapsed: 9.16 minutes...
##  Resampling run 700 finished. Time elapsed: 10.69 minutes...
##  Resampling run 800 finished. Time elapsed: 12.22 minutes...
##  Resampling run 900 finished. Time elapsed: 13.74 minutes...
## Time elapsed: 0 hr 15 min 15 sec
## Analysis of Variance Table
## 
## mod_CLBBJCMF_NFEC.mvb: communityB.mvb ~ HabitatNFEC
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
## 
## Multivariate test:
##                       Res.Df Df.diff score Pr(>score)   
## mod_CLBBJCMF_NFEC.mvb     56                            
## mod_BBCLECJCMFNF.mvb      55       1 50.63      0.003 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming correlated response via ridge regularization 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine NF and CL
habitatN$HabitatNFCL <- fct_collapse(habitatN$Habitat, NFCL = c("NF", "CL"))
fct_count(habitatN$HabitatNFCL)
## # A tibble: 5 x 2
##   f         n
##   <fct> <int>
## 1 BB        7
## 2 NFCL     28
## 3 EC        7
## 4 JC       10
## 5 MF        9
mod_ECBBJCMF_NFCL.mvb <- manyglm(communityB.mvb ~ HabitatNFCL, data = habitatN, family = binomial("cloglog"))
plot(mod_ECBBJCMF_NFCL.mvb)

anova(mod_BBCLECJCMFNF.mvb, mod_ECBBJCMF_NFCL.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot)  # p = 0.001, df = 1, score= 64.25, nBoot = 999, 20 mins.  CL and NF are sig diff
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.02 minutes...
##  Resampling run 100 finished. Time elapsed: 1.53 minutes...
##  Resampling run 200 finished. Time elapsed: 3.04 minutes...
##  Resampling run 300 finished. Time elapsed: 4.56 minutes...
##  Resampling run 400 finished. Time elapsed: 6.07 minutes...
##  Resampling run 500 finished. Time elapsed: 7.58 minutes...
##  Resampling run 600 finished. Time elapsed: 9.09 minutes...
##  Resampling run 700 finished. Time elapsed: 10.60 minutes...
##  Resampling run 800 finished. Time elapsed: 12.11 minutes...
##  Resampling run 900 finished. Time elapsed: 13.62 minutes...
## Time elapsed: 0 hr 15 min 6 sec
## Analysis of Variance Table
## 
## mod_ECBBJCMF_NFCL.mvb: communityB.mvb ~ HabitatNFCL
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
## 
## Multivariate test:
##                       Res.Df Df.diff score Pr(>score)    
## mod_ECBBJCMF_NFCL.mvb     56                             
## mod_BBCLECJCMFNF.mvb      55       1 77.19      0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming correlated response via ridge regularization 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
## do a table-wide correction, under the assumption that there are 15 possible pairwise comparisons between all 6 forest types:  (5*6)/2 = 15  
pvalues <- c(0.001, .001, .001, .003, .001)
pvalues.corr.fdr<-p.adjust(pvalues, method = "fdr", n = length(pvalues)) 
pvalues.corr.fdr
## [1] 0.00125 0.00125 0.00125 0.00300 0.00125
# 0.00125 0.00125 0.00125 0.00300 0.00125
# communityB <- communityB[ , 1:150]  # comment away if i want to use the whole dataset.  this is to produce a partial dataset for debugging
communityB.mvb <- mvabund(communityB) 

# nboot <- 499 # set to 20 for debugging (~ 20 mins for 499)
nboot <- 999 # set to 999 for publication

# base model with no levels combined:  mod_BBCLECJCMFNF.mvb

# combine NF and MF:  already done above

# combine MF and BB
habitatN$HabitatMFBB <- fct_collapse(habitatN$Habitat, MFBB = c("MF", "BB"))
fct_count(habitatN$HabitatMFBB)
## # A tibble: 5 x 2
##   f         n
##   <fct> <int>
## 1 MFBB     16
## 2 CL       15
## 3 EC        7
## 4 JC       10
## 5 NF       13
mod_CLECJCNF_MFBB.mvb <- manyglm(communityB.mvb ~ HabitatMFBB, data = habitatN, family = binomial("cloglog"))
plot(mod_CLECJCNF_MFBB.mvb)

anova(mod_BBCLECJCMFNF.mvb, mod_CLECJCNF_MFBB.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot)  # p = 0.001, df = 1, score= 54.23, nBoot = 999, 20 mins.  BB and MF are sig diff
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.02 minutes...
##  Resampling run 100 finished. Time elapsed: 1.53 minutes...
##  Resampling run 200 finished. Time elapsed: 3.04 minutes...
##  Resampling run 300 finished. Time elapsed: 4.55 minutes...
##  Resampling run 400 finished. Time elapsed: 6.06 minutes...
##  Resampling run 500 finished. Time elapsed: 7.57 minutes...
##  Resampling run 600 finished. Time elapsed: 9.08 minutes...
##  Resampling run 700 finished. Time elapsed: 10.59 minutes...
##  Resampling run 800 finished. Time elapsed: 12.10 minutes...
##  Resampling run 900 finished. Time elapsed: 13.61 minutes...
## Time elapsed: 0 hr 15 min 6 sec
## Analysis of Variance Table
## 
## mod_CLECJCNF_MFBB.mvb: communityB.mvb ~ HabitatMFBB
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
## 
## Multivariate test:
##                       Res.Df Df.diff score Pr(>score)   
## mod_CLECJCNF_MFBB.mvb     56                            
## mod_BBCLECJCMFNF.mvb      55       1 52.97      0.002 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming correlated response via ridge regularization 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine MF and JC
habitatN$HabitatMFJC <- fct_collapse(habitatN$Habitat, MFJC = c("MF", "JC"))
fct_count(habitatN$HabitatMFJC)
## # A tibble: 5 x 2
##   f         n
##   <fct> <int>
## 1 BB        7
## 2 CL       15
## 3 EC        7
## 4 MFJC     19
## 5 NF       13
mod_CLECBBNF_MFJC.mvb <- manyglm(communityB.mvb ~ HabitatMFJC, data = habitatN, family = binomial("cloglog"))
plot(mod_CLECBBNF_MFJC.mvb)

anova(mod_BBCLECJCMFNF.mvb, mod_CLECBBNF_MFJC.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot)  # p = 0.001, df = 1, score= 58.73, nBoot = 999, 20 mins.  JC and MF are sig diff
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.02 minutes...
##  Resampling run 100 finished. Time elapsed: 1.53 minutes...
##  Resampling run 200 finished. Time elapsed: 3.04 minutes...
##  Resampling run 300 finished. Time elapsed: 4.55 minutes...
##  Resampling run 400 finished. Time elapsed: 6.06 minutes...
##  Resampling run 500 finished. Time elapsed: 7.58 minutes...
##  Resampling run 600 finished. Time elapsed: 9.09 minutes...
##  Resampling run 700 finished. Time elapsed: 10.60 minutes...
##  Resampling run 800 finished. Time elapsed: 12.12 minutes...
##  Resampling run 900 finished. Time elapsed: 13.63 minutes...
## Time elapsed: 0 hr 15 min 7 sec
## Analysis of Variance Table
## 
## mod_CLECBBNF_MFJC.mvb: communityB.mvb ~ HabitatMFJC
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
## 
## Multivariate test:
##                       Res.Df Df.diff score Pr(>score)    
## mod_CLECBBNF_MFJC.mvb     56                             
## mod_BBCLECJCMFNF.mvb      55       1 57.82      0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming correlated response via ridge regularization 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine MF and EC
habitatN$HabitatMFEC <- fct_collapse(habitatN$Habitat, MFEC = c("MF", "EC"))
fct_count(habitatN$HabitatMFEC)
## # A tibble: 5 x 2
##   f         n
##   <fct> <int>
## 1 BB        7
## 2 CL       15
## 3 MFEC     16
## 4 JC       10
## 5 NF       13
mod_CLBBJCNF_MFEC.mvb <- manyglm(communityB.mvb ~ HabitatMFEC, data = habitatN, family = binomial("cloglog"))
plot(mod_CLBBJCNF_MFEC.mvb)

anova(mod_BBCLECJCMFNF.mvb, mod_CLBBJCNF_MFEC.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot)  # p = 0.001, df = 1, score= 55.29, nBoot = 999, 20 mins.  EC and MF are sig diff
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.02 minutes...
##  Resampling run 100 finished. Time elapsed: 1.53 minutes...
##  Resampling run 200 finished. Time elapsed: 3.05 minutes...
##  Resampling run 300 finished. Time elapsed: 4.56 minutes...
##  Resampling run 400 finished. Time elapsed: 6.08 minutes...
##  Resampling run 500 finished. Time elapsed: 7.59 minutes...
##  Resampling run 600 finished. Time elapsed: 9.10 minutes...
##  Resampling run 700 finished. Time elapsed: 10.61 minutes...
##  Resampling run 800 finished. Time elapsed: 12.13 minutes...
##  Resampling run 900 finished. Time elapsed: 13.64 minutes...
## Time elapsed: 0 hr 15 min 8 sec
## Analysis of Variance Table
## 
## mod_CLBBJCNF_MFEC.mvb: communityB.mvb ~ HabitatMFEC
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
## 
## Multivariate test:
##                       Res.Df Df.diff score Pr(>score)    
## mod_CLBBJCNF_MFEC.mvb     56                             
## mod_BBCLECJCMFNF.mvb      55       1 55.93      0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming correlated response via ridge regularization 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# combine MF and CL
habitatN$HabitatMFCL <- fct_collapse(habitatN$Habitat, MFCL = c("MF", "CL"))
fct_count(habitatN$HabitatMFCL)
## # A tibble: 5 x 2
##   f         n
##   <fct> <int>
## 1 BB        7
## 2 MFCL     24
## 3 EC        7
## 4 JC       10
## 5 NF       13
mod_ECBBJCNF_MFCL.mvb <- manyglm(communityB.mvb ~ HabitatMFCL, data = habitatN, family = binomial("cloglog"))
plot(mod_ECBBJCNF_MFCL.mvb)

anova(mod_BBCLECJCMFNF.mvb, mod_ECBBJCNF_MFCL.mvb, cor.type = "shrink", test = "score", show.time = "all", nBoot = nboot)  # p = 0.001, df = 1, score= 71.41, nBoot = 999, 20 mins.  CL and MF are sig diff
## Resampling begins for test 1.
##  Resampling run 0 finished. Time elapsed: 0.02 minutes...
##  Resampling run 100 finished. Time elapsed: 1.53 minutes...
##  Resampling run 200 finished. Time elapsed: 3.03 minutes...
##  Resampling run 300 finished. Time elapsed: 4.54 minutes...
##  Resampling run 400 finished. Time elapsed: 6.06 minutes...
##  Resampling run 500 finished. Time elapsed: 7.58 minutes...
##  Resampling run 600 finished. Time elapsed: 9.09 minutes...
##  Resampling run 700 finished. Time elapsed: 10.60 minutes...
##  Resampling run 800 finished. Time elapsed: 12.12 minutes...
##  Resampling run 900 finished. Time elapsed: 13.63 minutes...
## Time elapsed: 0 hr 15 min 7 sec
## Analysis of Variance Table
## 
## mod_ECBBJCNF_MFCL.mvb: communityB.mvb ~ HabitatMFCL
## mod_BBCLECJCMFNF.mvb: communityB.mvb ~ Habitat
## 
## Multivariate test:
##                       Res.Df Df.diff score Pr(>score)    
## mod_ECBBJCNF_MFCL.mvb     56                             
## mod_BBCLECJCMFNF.mvb      55       1 72.55      0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Arguments:
##  Test statistics calculated assuming correlated response via ridge regularization 
##  P-value calculated using 999 resampling iterations via PIT-trap resampling (to account for correlation in testing).
# do a table-wide correction, under the assumption that there are 30 possible pairwise comparisons between all 6 forest types:  (5*6)/2 = 15
pvalues <- c(0.001, .001, .001, .001)
pvalues.corr.fdr<-p.adjust(pvalues, method = "fdr", n = length(pvalues)) 
pvalues.corr.fdr 
## [1] 0.001 0.001 0.001 0.001
# [1] 0.001 0.001 0.001 0.001

# pvalues.corr.fdr<-p.adjust(pvalues, method = "fdr", n = length(pvalues))